diff options
265 files changed, 16166 insertions, 337 deletions
@@ -43,6 +43,11 @@ EXTRA_DIST := test unittests llvm.spec include win32 Xcode include $(LEVEL)/Makefile.config +ifeq ($(NACL_SANDBOX),1) + DIRS := $(filter-out tools/llvm-shlib runtime docs unittests, $(DIRS)) + OPTIONAL_DIRS := +endif + ifneq ($(ENABLE_SHARED),1) DIRS := $(filter-out tools/llvm-shlib, $(DIRS)) endif @@ -121,6 +126,7 @@ cross-compile-build-tools: fi; \ ($(MAKE) -C BuildTools \ BUILD_DIRS_ONLY=1 \ + NACL_SANDBOX=0 \ UNIVERSAL= \ UNIVERSAL_SDK_PATH= \ SDKROOT= \ diff --git a/Makefile.rules b/Makefile.rules index b2b02c25d4..51accc512b 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -655,6 +655,23 @@ else endif endif +ifeq ($(NACL_SANDBOX),1) + # NOTE: we specify --noirt to tell the driver that we should link + # against private (non-stable, non-IRT) libraries for the + # sandboxed translator. This could have been specified directly, + # except that LLVM slips in -lpthread elsewhere in the build system, + # and we need it to use -lpthread_private instead. + LIBS += -Wl,--noirt -lsrpc -limc_syscalls -lplatform -lgio -lpthread \ + -lm -lnacl -lnacl_dyncode -lnosys + ifeq ($(USE_TCMALLOC),1) + # Note: -ltcmalloc_minimal needs to stay last on the link line + LIBS += -ltcmalloc_minimal + CXX.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + C.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free + endif +else + LIBS += +endif #---------------------------------------------------------- # Options To Invoke Tools @@ -1239,8 +1256,10 @@ endif $(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(SharedLibDir)/.dir $(Echo) Linking $(BuildMode) $(SharedLibKindMessage) \ $(notdir $@) + # @LOCALMOD: the EXTRA_LIBS hack is necessary for LLVMgold.so + # c.f. llvm/tools/gold/Makefile $(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) \ - $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS) + $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS) $(EXTRA_LIBS) else $(LibName.SO): $(ObjectsO) $(SharedLibDir)/.dir $(Echo) Linking $(BuildMode) Shared Library $(notdir $@) diff --git a/OWNERS b/OWNERS new file mode 100644 index 0000000000..3f2cc43ac7 --- /dev/null +++ b/OWNERS @@ -0,0 +1,7 @@ +dschuff@chromium.org +eliben@chromium.org +jvoung@chromium.org +mseaborn@chromium.org +robertm@chromium.org +sehr@chromium.org + diff --git a/PRESUBMIT.py b/PRESUBMIT.py new file mode 100644 index 0000000000..af60ae4f1e --- /dev/null +++ b/PRESUBMIT.py @@ -0,0 +1,66 @@ +# Copyright (c) 2012 The Native Client Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# Documentation on PRESUBMIT.py can be found at: +# http://www.chromium.org/developers/how-tos/depottools/presubmit-scripts + +EXCLUDE_PROJECT_CHECKS_DIRS = [ '.' ] + +import subprocess +def CheckGitBranch(): + p = subprocess.Popen("git branch -vv", shell=True, + stdout=subprocess.PIPE) + output, _ = p.communicate() + + lines = output.split('\n') + for line in lines: + # output format for checked-out branch should be + # * branchname hash [TrackedBranchName ... + toks = line.split() + if '*' not in toks[0]: + continue + if not 'origin/master' in toks[3]: + warning = 'Warning: your current branch:\n' + line + warning += '\nis not tracking origin/master. git cl push may silently ' + warning += 'fail to push your change. To fix this, do\n' + warning += 'git branch --set-upstream '+ toks[1] + ' origin/master' + return warning + return None + print 'Warning: presubmit check could not determine local git branch' + return None + +def _CommonChecks(input_api, output_api): + """Checks for both upload and commit.""" + results = [] + results.extend(input_api.canned_checks.PanProjectChecks( + input_api, output_api, project_name='Native Client', + excluded_paths=tuple(EXCLUDE_PROJECT_CHECKS_DIRS))) + branch_warning = CheckGitBranch() + if branch_warning: + results.append(output_api.PresubmitPromptWarning(branch_warning)) + return results + +def CheckChangeOnUpload(input_api, output_api): + """Verifies all changes in all files. + Args: + input_api: the limited set of input modules allowed in presubmit. + output_api: the limited set of output modules allowed in presubmit. + """ + report = [] + report.extend(_CommonChecks(input_api, output_api)) + return report + +def CheckChangeOnCommit(input_api, output_api): + """Verifies all changes in all files and verifies that the + tree is open and can accept a commit. + Args: + input_api: the limited set of input modules allowed in presubmit. + output_api: the limited set of output modules allowed in presubmit. + """ + report = [] + report.extend(CheckChangeOnUpload(input_api, output_api)) + return report + +def GetPreferredTrySlaves(project, change): + return [] diff --git a/autoconf/config.sub b/autoconf/config.sub index 9942491533..a4f411f6c6 100755 --- a/autoconf/config.sub +++ b/autoconf/config.sub @@ -239,6 +239,10 @@ case $os in basic_machine=m68k-atari os=-mint ;; + -nacl*) + basic_machine=i686-pc + os=-nacl + ;; esac # Decode aliases for certain CPU-COMPANY combinations. @@ -347,6 +351,14 @@ case $basic_machine in i*86 | x86_64) basic_machine=$basic_machine-pc ;; + nacl64*) + basic_machine=x86_64-pc + os=-nacl + ;; + nacl*) + basic_machine=i686-pc + os=-nacl + ;; # Object if more than one company name word. *-*-*) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 @@ -1364,6 +1376,9 @@ case $os in ;; esac ;; + -nacl*) + os=-nacl + ;; -nto-qnx*) ;; -nto*) diff --git a/autoconf/configure.ac b/autoconf/configure.ac index f1842a6d8a..13134589af 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -266,6 +266,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on], llvm_cv_no_link_all_option="-Wl,--no-whole-archive" llvm_cv_os_type="Freestanding" llvm_cv_platform_type="Unix" ;; + *-*-nacl*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; *) llvm_cv_link_all_option="" llvm_cv_no_link_all_option="" diff --git a/codereview.settings b/codereview.settings new file mode 100644 index 0000000000..1940586a7f --- /dev/null +++ b/codereview.settings @@ -0,0 +1,10 @@ +# This file is used by gcl to get repository specific information. +CODE_REVIEW_SERVER: codereview.chromium.org +CC_LIST: native-client-reviews@googlegroups.com +VIEW_VC: https://gerrit.chromium.org/gerrit/gitweb?p=native_client/pnacl-llvm.git;a=commit;h= +STATUS: http://nativeclient-status.appspot.com/status +TRY_ON_UPLOAD: False +TRYSERVER_PROJECT: nacl +TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try-nacl +PUSH_URL_CONFIG: url.ssh://gerrit.chromium.org.pushinsteadof +ORIGIN_URL_CONFIG: http://git.chromium.org @@ -3784,6 +3784,11 @@ else llvm_cv_no_link_all_option="-Wl,--no-whole-archive" llvm_cv_os_type="Freestanding" llvm_cv_platform_type="Unix" ;; + *-*-nacl*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="Freestanding" + llvm_cv_platform_type="Unix" ;; *) llvm_cv_link_all_option="" llvm_cv_no_link_all_option="" diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index f43d365e3d..864870bfe7 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -60,6 +60,13 @@ typedef enum { LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC = 2 } lto_codegen_model; +/* @LOCALMOD-BEGIN */ +typedef enum { + LTO_OUTPUT_FORMAT_OBJECT = 0, /* object file */ + LTO_OUTPUT_FORMAT_SHARED = 1, /* shared library */ + LTO_OUTPUT_FORMAT_EXEC = 2 /* executable */ +} lto_output_format; +/* @LOCALMOD-END */ /** opaque reference to a loaded object module */ typedef struct LTOModule* lto_module_t; @@ -71,6 +78,17 @@ typedef struct LTOCodeGenerator* lto_code_gen_t; extern "C" { #endif + +/* @LOCALMOD-BEGIN */ + +/* Add a command-line option */ +void lto_add_command_line_option(const char* opt); + +/* Parse command line options */ +void lto_parse_command_line_options(); + +/* @LOCALMOD-END */ + /** * Returns a printable string. */ @@ -165,6 +183,36 @@ lto_module_get_target_triple(lto_module_t mod); extern void lto_module_set_target_triple(lto_module_t mod, const char *triple); +/* @LOCALMOD-BEGIN */ + +/** + * Get the module format for this module + */ +extern lto_output_format +lto_module_get_output_format(lto_module_t mod); + +/** + * Get the module soname + */ +extern const char* +lto_module_get_soname(lto_module_t mod); + + +/** + * Get the i'th library dependency. + * Returns NULL if i >= lto_module_get_num_library_deps() + */ +extern const char* +lto_module_get_library_dep(lto_module_t mod, unsigned int i); + + +/** + * Return the number of library dependencies of this module. + */ +extern unsigned int +lto_module_get_num_library_deps(lto_module_t mod); + +/* @LOCALMOD-END */ /** * Returns the number of symbols in the object module. @@ -211,7 +259,26 @@ lto_codegen_dispose(lto_code_gen_t); extern bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod); +/* @LOCALMOD-BEGIN */ +/** + * Add an object module to the set of modules for which code will be generated. + * This does not merge the module immediately, unlike lto_codegen_add_module. + * It will hold onto the module until the user calls + * lto_codegen_link_gathered_modules_and_dispose(). The lto_module_t + * should now by owned by the lto_code_gen_t, and will be freed when + * the link is done. + */ +extern bool +lto_codegen_gather_module_for_link(lto_code_gen_t cg, lto_module_t mod); +/** + * Merges modules that are part of the set of modules gathered by + * lto_codegen_gather_module_for_link(), and the also destroys the modules + * as lto_module_dispose() would. + */ +extern bool +lto_codegen_link_gathered_modules_and_dispose(lto_code_gen_t cg); +/* @LOCALMOD-END*/ /** * Sets if debug info should be generated. @@ -258,6 +325,56 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args, extern void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol); +/* @LOCALMOD-BEGIN */ + +/** + * Sets the module type for the merged module + */ +extern void +lto_codegen_set_merged_module_output_format(lto_code_gen_t cg, + lto_output_format format); + +/** + * Sets the SOName for the merged module + */ +extern void +lto_codegen_set_merged_module_soname(lto_code_gen_t cg, + const char *soname); + +/** + * Add a library dependency to the merged module + */ +extern void +lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg, + const char *lib); + +/** + * Wrap a symbol in the merged module. + */ +extern void +lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg, + const char *sym); + + +/** + * Set version of a defined symbol in the merged module + */ +extern const char * +lto_codegen_set_symbol_def_version(lto_code_gen_t cg, + const char *sym, + const char *version, + bool is_default); + + +/** + * Set version of an undefined symbol in the merged module + */ +extern const char * +lto_codegen_set_symbol_needed(lto_code_gen_t cg, + const char *sym, + const char *version, + const char *dynfile); +/* @LOCALMOD-END */ /** * Writes a new object file at the specified path that contains the * merged contents of all modules added so far. diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index a92b85939f..02c5f422ce 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -94,6 +94,12 @@ namespace llvm { /// default, this is equal to CurrentFnSym. MCSymbol *CurrentFnSymForSize; + /// @LOCALMOD-BEGIN + /// Is the bitcode module a plain object? This is false + /// for shared (pso) and executable (pexe) files. + bool IsPlainObject; + /// @LOCALMOD-END + private: // GCMetadataPrinters - The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. @@ -240,6 +246,18 @@ namespace llvm { // Targets can, or in the case of EmitInstruction, must implement these to // customize output. + // @LOCALMOD-START + /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata. + virtual bool UseReadOnlyJumpTables() const { return false; } + /// GetTargetBasicBlockAlign - the target alignment for basic blocks. + virtual unsigned GetTargetBasicBlockAlign() const { return 0; } + /// GetTargetLabelAlign - Get optional alignment for TargetOpcode + /// labels E.g., EH_LABEL. + virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const { + return 0; + } + // @LOCALMOD-END + /// EmitStartOfAsmFile - This virtual method can be overridden by targets /// that want to emit something at the start of their file. virtual void EmitStartOfAsmFile(Module &) {} @@ -254,7 +272,12 @@ namespace llvm { /// EmitFunctionBodyEnd - Targets can override this to emit stuff after /// the last basic block in the function. - virtual void EmitFunctionBodyEnd() {} + virtual void EmitFunctionBodyEnd() { + // @LOCALMOD-START + unsigned NextFunctionAlignment = GetTargetBasicBlockAlign(); + if (NextFunctionAlignment) EmitAlignment(NextFunctionAlignment); + // @LOCALMOD-END + } /// EmitInstruction - Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 436918b1eb..c1bc3aba5e 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -165,6 +165,7 @@ private: SmallVector<uint32_t, 16> UsedRegs; unsigned FirstByValReg; bool FirstByValRegValid; + bool HasByValInRegPosition; // @LOCALMOD -- ARM only: see comment below. protected: ParmContext CallOrPrologue; @@ -313,6 +314,19 @@ public: void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; } bool isFirstByValRegValid() const { return FirstByValRegValid; } + // @LOCALMOD-BEGIN + // We disabled the splitting of byval between registers and memory. + // This separate flag indicates that a byval existed. We cannot reuse + // isFirstByValRegValid() because that is already used by the broken + // mechanism of splitting between stack and regs. We should check + // again if this mechanism is still broken later, or try to fix that + // mechanism. + // NOTE: this is only for ARM, so should be refactored. + bool hasByValInRegPosition() const { return HasByValInRegPosition; } + void setHasByValInRegPosition() { HasByValInRegPosition = true; } + void clearHasByValInRegPosition() { HasByValInRegPosition = false; } + // @LOCALMOD-END + ParmContext getCallOrPrologue() const { return CallOrPrologue; } private: diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 5d0a3b4c70..47170e4e58 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -641,6 +641,19 @@ namespace ISD { /// is the chain and the second operand is the alloca pointer. LIFETIME_START, LIFETIME_END, + // @LOCALMOD-BEGIN + // NACL_* - Native Client instrinsics. + // NACL_READ_TP is a fast built-in version of NaCl's tls_get() IRT + // interface. + NACL_READ_TP, + // These correspond to functions in: + // native_client/src/untrusted/nacl/tls_params.h + NACL_TP_TLS_OFFSET, + NACL_TP_TDB_OFFSET, + // Expands to the target architecture enumeration value. + NACL_TARGET_ARCH, + // @LOCALMOD-END + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h index 5a3fb4b1a3..dcb013e1f9 100644 --- a/include/llvm/CodeGen/IntrinsicLowering.h +++ b/include/llvm/CodeGen/IntrinsicLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_INTRINSICLOWERING_H #define LLVM_CODEGEN_INTRINSICLOWERING_H +#include "llvm/ADT/StringSet.h" // @LOCALMOD #include "llvm/Intrinsics.h" namespace llvm { @@ -26,12 +27,23 @@ namespace llvm { class IntrinsicLowering { const DataLayout& TD; - + static StringSet<> FuncNames; // @LOCALMOD + bool Warned; public: explicit IntrinsicLowering(const DataLayout &td) : TD(td), Warned(false) {} + /// @LOCALMOD-BEGIN + /// GetFuncNames - Get the names of all functions which may + /// be called by an intrinsic. + static const StringSet<> &GetFuncNames(); + + /// IsCalledByIntrinsic - Returns true if a function may be called + /// by an intrinsic. + static bool IsCalledByIntrinsic(const StringRef &FuncName); + /// @LOCALMOD-END + /// AddPrototypes - This method, if called, causes all of the prototypes /// that might be needed by an intrinsic lowering implementation to be /// inserted into the module specified. diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h index 89f00e91f7..f95b8b6b84 100644 --- a/include/llvm/CodeGen/JITCodeEmitter.h +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -290,7 +290,7 @@ public: /// getCurrentPCOffset - Return the offset from the start of the emitted /// buffer that we are currently writing to. - uintptr_t getCurrentPCOffset() const { + virtual uintptr_t getCurrentPCOffset() const { // @LOCALMOD return CurBufferPtr-BufferBegin; } @@ -335,6 +335,13 @@ public: /// getLabelLocations - Return the label locations map of the label IDs to /// their address. virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { return 0; } + + // @LOCALMOD-START + virtual void beginBundleLock() {}; + virtual void endBundleLock() {}; + virtual void alignToBundleBeginning() {}; + virtual void alignToBundleEnd() {}; + // @LOCALMOD-END }; } // End llvm namespace diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h index 8414c64544..e1911cfd82 100644 --- a/include/llvm/CodeGen/LexicalScopes.h +++ b/include/llvm/CodeGen/LexicalScopes.h @@ -162,6 +162,12 @@ public: #ifndef NDEBUG IndentLevel = 0; #endif + // @LOCALMOD-BEGIN -- Hack for bug + // http://code.google.com/p/nativeclient/issues/detail?id=2786 + Desc.make_weak(); + InlinedAtLocation.make_weak(); + // @LOCALMOD-END + if (Parent) Parent->addChild(this); } diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 8ed215d75b..827a9f81e8 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -57,6 +57,17 @@ public: virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) = 0; + // @LOCALMOD-START + /// getJumpTableIndex - Check if this is a reference to a jump table. + /// If so, return a pointer to the jump table index value that is stored + /// in the constant pool, else return 0. + /// The default behavior is to indicate that the value is not a jump table + /// index. This is used by BranchFolder::runOnMachineFunction() and only in + /// conjunction with ARM targets + /// TODO: this should be cleaned up as it does tripple duty: tester, setter, getter + virtual unsigned *getJumpTableIndex() { return 0; } + // @LOCALMOD-END + /// print - Implement operator<< virtual void print(raw_ostream &O) const = 0; }; diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 770685358a..01291e43c8 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -291,6 +291,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return BuildMI(BB, MII, DL, MCID); } +// @LOCALMOD-BEGIN +/// BuildMI - This version of the builder inserts the newly-built +/// instruction before the given position in the given MachineBasicBlock, +/// does NOT take a destination register, and does not add implicit operands. +/// +inline MachineInstrBuilder BuildMI_NoImp(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + DebugLoc DL, + const MCInstrDesc &MCID) { + MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL, true); + BB.insert(I, MI); + return MachineInstrBuilder(MI); +} +// @LOCALMOD-END + /// BuildMI - This version of the builder inserts the newly-built /// instruction at the end of the given MachineBasicBlock, and does NOT take a /// destination register. diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h index 244b466e17..8d71930882 100644 --- a/include/llvm/CodeGen/MachineRelocation.h +++ b/include/llvm/CodeGen/MachineRelocation.h @@ -197,6 +197,14 @@ public: return Offset; } + // @LOCALMOD-START + /// setMachineCodeOffset() - Adjust the offset in the code buffer (this is + /// used when the instruction is moved after emission for bundle alignment) + void setMachineCodeOffset(intptr_t offset) { + Offset = offset; + } + // @LOCALMOD-END + /// getRelocationType - Return the target-specific relocation ID for this /// relocation. unsigned getRelocationType() const { diff --git a/include/llvm/ExecutionEngine/NaClJITMemoryManager.h b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h new file mode 100644 index 0000000000..dcd06627df --- /dev/null +++ b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h @@ -0,0 +1,237 @@ +//=-- NaClJITMemoryManager.h - Interface JIT uses to Allocate Mem -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#ifndef LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H +#define LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/Support/Allocator.h" + +namespace llvm { + +class Function; +class GlobalValue; + +struct SimpleSlab { + uint8_t *address; + size_t size; + uint8_t *next_free; +}; + +struct FreeListNode { + uint8_t *address; + uintptr_t size; + FreeListNode *Prev; + FreeListNode *Next; + FreeListNode *RemoveFromFreeList() { + assert(Next->Prev == this && Prev->Next == this && "Freelist broken!"); + Next->Prev = Prev; + return Prev->Next = Next; + } + void AddToFreeList(FreeListNode *FreeList) { + Next = FreeList; + Prev = FreeList->Prev; + Prev->Next = this; + Next->Prev = this; + } +}; + +class NaClJITMemoryManager : public JITMemoryManager { + // NaCl disallows writing into any code region, and disallows executing any + // data region. Thus we can never get any RWX memory and the the strategy + // used by the other allocators of colocation of allocation metadata + // with the allocated code won't work. + // Currently with NaCl we have one single pool of usable space between the + // text and rodata segments, defined by the linker + // so to support stub allocation in the middle of a function, we allocate + // them in slabs interspersed with the functions. + + static const size_t kStubSlabSize = 16 * 1024; + static const size_t kDataSlabSize = 16 * 1024; + static const size_t kCodeSlabSize = 64 * 1024; + + typedef DenseMap<uint8_t *, size_t> AllocationTable; + + uint8_t *AllocatableRegionStart; + uint8_t *AllocatableRegionLimit; + uint8_t *NextCode; + SimpleSlab CurrentStubSlab; + + // Allocation metadata must be kept separate from code, so the free list is + // allocated with new rather than being a header in the code blocks + FreeListNode *CodeFreeListHead; + FreeListNode *CurrentCodeBlock; + // Mapping from pointer to allocated function, to size of allocation + AllocationTable AllocatedFunctions; + + // Since Exception tables are allocated like functions (i.e. we don't know + // ahead of time how large they are) we use the same allocation method for + // simplicity even though it's not strictly necessary to separate the + // allocation metadata from the allocated data. + FreeListNode *DataFreeListHead; + FreeListNode *CurrentDataBlock; + AllocationTable AllocatedTables; + BumpPtrAllocator DataAllocator; + + uint8_t *GOTBase; // Target Specific reserved memory + + FreeListNode *allocateCodeSlab(size_t MinSize); + FreeListNode *allocateDataSlab(size_t MinSize); + SimpleSlab allocateStubSlab(size_t MinSize); + + // Functions for allocations using one of the free lists + void InitFreeList(FreeListNode **Head); + void DestroyFreeList(FreeListNode *Head); + FreeListNode *FreeListAllocate(uintptr_t &ActualSize, FreeListNode *Head, + FreeListNode * (NaClJITMemoryManager::*allocate)(size_t)); + void FreeListFinishAllocation(FreeListNode *Block, FreeListNode *Head, + uint8_t *AllocationStart, uint8_t *AllocationEnd, AllocationTable &table); + void FreeListDeallocate(FreeListNode *Head, AllocationTable &Table, + void *Body); + public: + // TODO(dschuff): how to find the real value? is it a flag? + static const int kBundleSize = 32; + static const intptr_t kJumpMask = -32; + NaClJITMemoryManager(); + virtual ~NaClJITMemoryManager(); + static inline bool classof(const JITMemoryManager*) { return true; } + + /// setMemoryWritable - No-op on NaCl - code is never writable + virtual void setMemoryWritable() {} + + /// setMemoryExecutable - No-op on NaCl - data is never executable + virtual void setMemoryExecutable() {} + + /// setPoisonMemory - No-op on NaCl - nothing unvalidated is ever executable + virtual void setPoisonMemory(bool poison) {} + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function. As such it is only useful for resolving library + /// symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + virtual void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) ; + + //===--------------------------------------------------------------------===// + // Global Offset Table Management + //===--------------------------------------------------------------------===// + + /// AllocateGOT - If the current table requires a Global Offset Table, this + /// method is invoked to allocate it. This method is required to set HasGOT + /// to true. + virtual void AllocateGOT(); + + /// getGOTBase - If this is managing a Global Offset Table, this method should + /// return a pointer to its base. + virtual uint8_t *getGOTBase() const { + return GOTBase; + } + + //===--------------------------------------------------------------------===// + // Main Allocation Functions + //===--------------------------------------------------------------------===// + + /// startFunctionBody - When we start JITing a function, the JIT calls this + /// method to allocate a block of free RWX memory, which returns a pointer to + /// it. If the JIT wants to request a block of memory of at least a certain + /// size, it passes that value as ActualSize, and this method returns a block + /// with at least that much space. If the JIT doesn't know ahead of time how + /// much space it will need to emit the function, it passes 0 for the + /// ActualSize. In either case, this method is required to pass back the size + /// of the allocated block through ActualSize. The JIT will be careful to + /// not write more than the returned ActualSize bytes of memory. + virtual uint8_t *startFunctionBody(const Function *F, + uintptr_t &ActualSize); + + /// allocateStub - This method is called by the JIT to allocate space for a + /// function stub (used to handle limited branch displacements) while it is + /// JIT compiling a function. For example, if foo calls bar, and if bar + /// either needs to be lazily compiled or is a native function that exists too + /// far away from the call site to work, this method will be used to make a + /// thunk for it. The stub should be "close" to the current function body, + /// but should not be included in the 'actualsize' returned by + /// startFunctionBody. + virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment); + + /// endFunctionBody - This method is called when the JIT is done codegen'ing + /// the specified function. At this point we know the size of the JIT + /// compiled function. This passes in FunctionStart (which was returned by + /// the startFunctionBody method) and FunctionEnd which is a pointer to the + /// actual end of the function. This method should mark the space allocated + /// and remember where it is in case the client wants to deallocate it. + virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd); + + /// allocateCodeSection - Allocate a memory block of (at least) the given + /// size suitable for executable code. The SectionID is a unique identifier + /// assigned by the JIT and passed through to the memory manager for + /// the instance class to use if it needs to communicate to the JIT about + /// a given section after the fact. + virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID); + + /// allocateDataSection - Allocate a memory block of (at least) the given + /// size suitable for data. The SectionID is a unique identifier + /// assigned by the JIT and passed through to the memory manager for + /// the instance class to use if it needs to communicate to the JIT about + /// a given section after the fact. + virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID); + + /// allocateSpace - Allocate a memory block of the given size. This method + /// cannot be called between calls to startFunctionBody and endFunctionBody. + virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment); + + /// allocateGlobal - Allocate memory for a global. + virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment); + + /// deallocateFunctionBody - Free the specified function body. The argument + /// must be the return value from a call to startFunctionBody() that hasn't + /// been deallocated yet. This is never called when the JIT is currently + /// emitting a function. + virtual void deallocateFunctionBody(void *Body); + + /// startExceptionTable - When we finished JITing the function, if exception + /// handling is set, we emit the exception table. + virtual uint8_t* startExceptionTable(const Function* F, + uintptr_t &ActualSize); + + /// endExceptionTable - This method is called when the JIT is done emitting + /// the exception table. + virtual void endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister); + + /// deallocateExceptionTable - Free the specified exception table's memory. + /// The argument must be the return value from a call to startExceptionTable() + /// that hasn't been deallocated yet. This is never called when the JIT is + /// currently emitting an exception table. + virtual void deallocateExceptionTable(void *ET); + + virtual size_t GetDefaultCodeSlabSize() { + return kCodeSlabSize; + } + virtual size_t GetDefaultDataSlabSize() { + return kDataSlabSize; + } + virtual size_t GetDefaultStubSlabSize() { + return kStubSlabSize; + } + +}; + +} + +#endif // LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h index 7f7f74b1e2..aaab1922f5 100644 --- a/include/llvm/GlobalValue.h +++ b/include/llvm/GlobalValue.h @@ -76,6 +76,26 @@ public: removeDeadConstantUsers(); // remove any dead constants using this. } + // @LOCALMOD-BEGIN + /// Set the symbol version for this definition. + void setVersionDef(StringRef Version, bool IsDefault); + + /// Set the symbol version and dynamic source file (soname) + /// for this exterally provided global. + void setNeeded(StringRef Version, StringRef DynFile); + + /// Get the name of this symbol without the version suffix. + StringRef getUnversionedName() const; + + /// Get the version of this symbol. + /// Returns an empty string if the symbol is unversioned. + StringRef getVersion() const; + + /// Returns true if this is the default version of the symbol. + /// This may only be called if the symbol is versioned. + bool isDefaultVersion() const; + // @LOCALMOD-END + unsigned getAlignment() const { return (1u << Alignment) >> 1; } diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 8c164eb919..a6b7d31817 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -265,6 +265,8 @@ void initializeFinalizeMachineBundlesPass(PassRegistry&); void initializeLoopVectorizePass(PassRegistry&); void initializeBBVectorizePass(PassRegistry&); void initializeMachineFunctionPrinterPassPass(PassRegistry&); +void initializeExpandCtorsPass(PassRegistry&); // @LOCALMOD +void initializeNaClCcRewritePass(PassRegistry&); // @LOCALMOD } #endif diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 2e1597fe6f..42b9da6914 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -457,6 +457,36 @@ def int_convertus : Intrinsic<[llvm_anyint_ty], def int_convertuu : Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>; +// @LOCALMOD-BEGIN +//===----------------------- Native Client Intrinsics ---------------------===// +// TODO(sehr): conditionalize this on IsNaCl64 | IsNaCl32 | IsNaClArm. +// The expansions of these are in lib/Target/X86/X86InstrNacl.{td, cpp} and +// lib/Target/ARM/ARMInstrInfo.td. +def int_nacl_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_ptr_ty]>, + GCCBuiltin<"__builtin_nacl_setjmp">; +def int_nacl_longjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>, + GCCBuiltin<"__builtin_nacl_longjmp">; + +// Fast built-in version of NaCl's tls_get() IRT interface. +def int_nacl_read_tp : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>, + GCCBuiltin<"__builtin_nacl_read_tp">; + +// The following intrinsics provide target-specific implementations of +// the interface in native_client/src/untrusted/nacl/tls_params.h. +// The intrinsic names are basically the functions there without the +// leading underscores. +def int_nacl_tp_tls_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>, + GCCBuiltin<"__builtin_nacl_tp_tls_offset">; +def int_nacl_tp_tdb_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>, + GCCBuiltin<"__builtin_nacl_tp_tdb_offset">; + +// The following intrinsic provides a target-specific constant value to +// indicate the target platform compiled to. The enum values are enumerated +// pnaclintrin.h. +def int_nacl_target_arch : Intrinsic<[llvm_i32_ty], []>, + GCCBuiltin<"__builtin_nacl_target_arch">; +// @LOCALMOD-END + //===----------------------------------------------------------------------===// // Target-specific intrinsics //===----------------------------------------------------------------------===// diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h index 72ed1a317c..f5ca3d5471 100644 --- a/include/llvm/MC/MCAsmBackend.h +++ b/include/llvm/MC/MCAsmBackend.h @@ -25,6 +25,7 @@ class MCInst; class MCInstFragment; class MCObjectWriter; class MCSection; +class MCStreamer; class MCValue; class raw_ostream; @@ -157,6 +158,23 @@ public: /// handleAssemblerFlag - Handle any target-specific assembler flags. /// By default, do nothing. virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {} + + // @LOCALMOD-BEGIN + /// getBundleSize - Return the size (in bytes) of code bundling units + /// for this target. If 0, bundling is disabled. This is used exclusively + /// for Native Client. + virtual unsigned getBundleSize() const { + return 0; + } + + /// CustomExpandInst - + /// If the MCInst instruction has a custom expansion, write it to the + /// MCStreamer 'Out'. This can be used to perform "last minute" rewrites of + /// MCInst instructions for emission. + virtual bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const { + return false; + } + // @LOCALMOD-END }; } // End llvm namespace diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 97aad71fd9..29ec1020c3 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -48,6 +48,14 @@ namespace llvm { /// Default is 4. unsigned PointerSize; + /// @LOCALMOD-BEGIN + /// TODO(pdox): Before upstreaming this, make sure every target backend + /// sets it correctly. + /// StackSlotSize - Stack slot size in bytes. + /// Default is 4. + unsigned StackSlotSize; + /// @LOCALMOD-END + /// IsLittleEndian - True if target is little endian. /// Default is true. bool IsLittleEndian; @@ -340,6 +348,13 @@ namespace llvm { return PointerSize; } + /// @LOCALMOD-BEGIN + /// getStackSlotSize - Get the stack slot size in bytes. + unsigned getStackSlotSize() const { + return StackSlotSize; + } + /// @LOCALMOD-END + /// islittleendian - True if the target is little endian. bool isLittleEndian() const { return IsLittleEndian; diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h index cf79216d07..fdded4ffa7 100644 --- a/include/llvm/MC/MCAsmLayout.h +++ b/include/llvm/MC/MCAsmLayout.h @@ -80,6 +80,11 @@ public: /// \brief Get the offset of the given fragment inside its containing section. uint64_t getFragmentOffset(const MCFragment *F) const; + // @LOCALMOD-BEGIN + /// \brief Get the bundle padding of the given fragment. + uint8_t getFragmentPadding(const MCFragment *F) const; + // @LOCALMOD-END + /// @} /// @name Utility Functions /// @{ diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 5771415c81..f67aa9b966 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -52,11 +52,39 @@ public: FT_Org, FT_Dwarf, FT_DwarfFrame, - FT_LEB + FT_LEB, + FT_Tiny // @LOCALMOD }; + // @LOCALMOD-BEGIN + enum BundleAlignType { + BundleAlignNone = 0, + BundleAlignStart = 1, + BundleAlignEnd = 2 + }; + // @LOCALMOD-END + private: - FragmentType Kind; + // @LOCALMOD-BEGIN + // Try to compress the layout of MCFragment by: + // 1) Making Kind, the bundling flags, and BundlePadding fit in 32 bits. + // 2) Move LayoutOrder to fit in the hole left by aligning for 64 bits. + + FragmentType Kind : 4; + + BundleAlignType BundleAlign : 2; + bool BundleGroupStart : 1; + bool BundleGroupEnd : 1; + + /// BundlePadding - The computed padding for this fragment. This is ~0 + /// until initialized. + uint8_t BundlePadding; + + /// LayoutOrder - The layout order of this fragment. + unsigned LayoutOrder; + + // @LOCALMOD-END + /// Parent - The data for the section this fragment is in. MCSectionData *Parent; @@ -75,9 +103,6 @@ private: /// initialized. uint64_t Offset; - /// LayoutOrder - The layout order of this fragment. - unsigned LayoutOrder; - /// @} protected: @@ -99,12 +124,44 @@ public: unsigned getLayoutOrder() const { return LayoutOrder; } void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } + // @LOCALMOD-BEGIN + bool isBundleGroupStart() const { return BundleGroupStart; } + void setBundleGroupStart(bool Value) { BundleGroupStart = Value; } + + bool isBundleGroupEnd() const { return BundleGroupEnd; } + void setBundleGroupEnd(bool Value) { BundleGroupEnd = Value; } + + BundleAlignType getBundleAlign() const { return BundleAlign; } + void setBundleAlign(BundleAlignType Value) { BundleAlign = Value; } + // @LOCALMOD-END + void dump(); }; +// @LOCALMOD-BEGIN +// This is just a tiny data fragment with no fixups. +// (To help with memory usage) +class MCTinyFragment : public MCFragment { + private: + SmallString<6> Contents; + + public: + + MCTinyFragment(MCSectionData *SD = 0) : MCFragment(FT_Tiny, SD) {} + + SmallString<6> &getContents() { return Contents; } + const SmallString<6> &getContents() const { return Contents; } + + static bool classof(const MCFragment *F) { + return F->getKind() == MCFragment::FT_Tiny; + } + static bool classof(const MCTinyFragment *) { return true; } +}; +// @LOCALMOD-END + class MCDataFragment : public MCFragment { virtual void anchor(); - SmallString<32> Contents; + SmallString<6> Contents; // @LOCALMOD: Memory efficiency /// Fixups - The list of fixups in this fragment. std::vector<MCFixup> Fixups; @@ -119,8 +176,8 @@ public: /// @name Accessors /// @{ - SmallString<32> &getContents() { return Contents; } - const SmallString<32> &getContents() const { return Contents; } + SmallString<6> &getContents() { return Contents; } // @LOCALMOD + const SmallString<6> &getContents() const { return Contents; } // @LOCALMOD /// @} /// @name Fixup Access @@ -464,6 +521,29 @@ private: /// it. unsigned HasInstructions : 1; + // @LOCALMOD-BEGIN + bool BundlingEnabled : 1; + bool BundleLocked : 1; + + // Because ".bundle_lock" occurs before the fragment it applies to exists, + // we need to keep this flag around so we know to mark the next fragment + // as the start of a bundle group. A similar flag is not necessary for the + // last fragment, because when a .bundle_unlock occurs, the last fragment + // in the group already exists and can be marked directly. + bool BundleGroupFirstFrag : 1; + + typedef MCFragment::BundleAlignType BundleAlignType; + BundleAlignType BundleAlignNext : 2; + + // Optimization to reduce the number of fragments generated (for memory + // savings). Keep track of when we know the offset of the next point to + // emit an instruction. If we know the offset from a known alignment point, + // we can just append to the previous fragment. + bool BundleOffsetKnown : 1; + unsigned BundleSize; + unsigned BundleOffset; + // @LOCALMOD-END + /// @} public: @@ -485,6 +565,25 @@ public: unsigned getLayoutOrder() const { return LayoutOrder; } void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } + // @LOCALMOD-BEGIN + bool isBundlingEnabled() const { return BundlingEnabled; } + + bool isBundleLocked() const { return BundleLocked; } + void setBundleLocked(bool Value) { BundleLocked = Value; } + + bool isBundleGroupFirstFrag() const { return BundleGroupFirstFrag; } + void setBundleGroupFirstFrag(bool Value) { BundleGroupFirstFrag = Value; } + + + BundleAlignType getBundleAlignNext() const { return BundleAlignNext; } + void setBundleAlignNext(BundleAlignType Value) { BundleAlignNext = Value; } + + void MarkBundleOffsetUnknown(); + bool ShouldCreateNewFragment(size_t Size); + void UpdateBundleOffset(size_t Size); + void AlignBundleOffsetTo(size_t AlignBase); + // @LOCALMOD-END + /// @name Fragment Access /// @{ @@ -743,6 +842,13 @@ private: bool fragmentNeedsRelaxation(const MCInstFragment *IF, const MCAsmLayout &Layout) const; + // @LOCALMOD-BEGIN + uint8_t ComputeBundlePadding(const MCAsmLayout &Layout, + MCFragment *F, + uint64_t FragmentOffset) const; + // @LOCALMOD-END + + /// layoutOnce - Perform one layout iteration and return true if any offsets /// were adjusted. bool layoutOnce(MCAsmLayout &Layout); @@ -809,6 +915,12 @@ public: MCAsmBackend &getBackend() const { return Backend; } + // @LOCALMOD-BEGIN + uint64_t getBundleSize() const; + uint64_t getBundleMask() const; + // @LOCALMOD-END + + MCCodeEmitter &getEmitter() const { return Emitter; } MCObjectWriter &getWriter() const { return Writer; } diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h index 38cdc7293b..8b0f191792 100644 --- a/include/llvm/MC/MCELFObjectWriter.h +++ b/include/llvm/MC/MCELFObjectWriter.h @@ -69,6 +69,12 @@ public: return ELF::ELFOSABI_FREEBSD; case Triple::Linux: return ELF::ELFOSABI_LINUX; + // @LOCALMOD-BEGIN + // This shouldn't be needed anymore (sel_ldr doesn't check for it), + // but removing it may require some changes in binutils also. + case Triple::NativeClient: + return ELF::ELFOSABI_NACL; + // @LOCALMOD-END default: return ELF::ELFOSABI_NONE; } diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index 08b00f1c47..d9f72b7f42 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -68,6 +68,14 @@ public: unsigned AddrSpace); virtual void EmitULEB128Value(const MCExpr *Value); virtual void EmitSLEB128Value(const MCExpr *Value); + + // @LOCALMOD-BEGIN + void EmitBundleLock(); + void EmitBundleUnlock(); + void EmitBundleAlignStart(); + void EmitBundleAlignEnd(); + // @LOCALMOD-END + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); virtual void ChangeSection(const MCSection *Section); virtual void EmitInstruction(const MCInst &Inst); diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 230d27ef2e..40f83bf5d5 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -467,6 +467,27 @@ namespace llvm { /// @} + // @LOCALMOD-BEGIN + /// @name Bundling Directives + /// @{ + + /// EmitBundleLock - Begin a group of instructions which cannot + /// cross a bundle boundary. + virtual void EmitBundleLock() = 0; + + /// EmitBundleUnlock - End a bundle-locked group of instructions. + virtual void EmitBundleUnlock() = 0; + + /// EmitBundleAlignStart - Guarantee that the next instruction or + /// bundle-locked group starts at the beginning of a bundle. + virtual void EmitBundleAlignStart() = 0; + + /// EmitBundleAlignEnd - Guarantee that the next instruction or + /// bundle-locked group finishes at the end of a bundle. + virtual void EmitBundleAlignEnd() = 0; + /// @} + // @LOCALMOD-END + /// EmitFileDirective - Switch to a new logical file. This is used to /// implement the '.file "foo.c"' assembler directive. virtual void EmitFileDirective(StringRef Filename) = 0; diff --git a/include/llvm/Module.h b/include/llvm/Module.h index e6303ac775..13b56433dc 100644 --- a/include/llvm/Module.h +++ b/include/llvm/Module.h @@ -186,6 +186,22 @@ public: : Behavior(B), Key(K), Val(V) {} }; + /// @LOCALMOD-BEGIN + /// An enumeration for describing the module format + enum OutputFormat { + ObjectOutputFormat, + SharedOutputFormat, + ExecutableOutputFormat + }; + + /// A structure describing the symbols needed from an external file. + struct NeededRecord { + std::string DynFile; // Source file (soname) + std::vector<std::string> Symbols; // List of symbol names + // (with version suffix) + }; + /// @LOCALMOD-END + /// @} /// @name Member Variables /// @{ @@ -203,6 +219,9 @@ private: std::string ModuleID; ///< Human readable identifier for the module std::string TargetTriple; ///< Platform target triple Module compiled on std::string DataLayout; ///< Target data description + // @LOCALMOD-BEGIN + mutable std::string ModuleSOName; ///< Module SOName (for shared format) + // @LOCALMOD-END void *NamedMDSymTab; ///< NamedMDNode names. friend class Constant; @@ -234,6 +253,24 @@ public: /// @returns a string containing the target triple. const std::string &getTargetTriple() const { return TargetTriple; } + // @LOCALMOD-BEGIN + + /// Get the module format + /// @returns the module format + OutputFormat getOutputFormat() const; + + /// Get the SOName of this module. + /// @returns a string containing the module soname + const std::string &getSOName() const; + + /// Record the needed information for a global value. + /// This creates a needed record for DynFile, if one does not already exist. + void addNeededRecord(StringRef DynFile, GlobalValue *GV); + + // Fill NeededOut with all needed records present in the module. + void getNeededRecords(std::vector<NeededRecord> *NeededOut) const; + // @LOCALMOD-END + /// Get the target endian information. /// @returns Endianess - an enumeration for the endianess of the target Endianness getEndianness() const; @@ -263,6 +300,18 @@ public: /// Set the target triple. void setTargetTriple(StringRef T) { TargetTriple = T; } + /// @LOCALMOD-BEGIN + + /// Set the module format + void setOutputFormat(OutputFormat F); + + /// For modules with output format "shared", set the output soname. + void setSOName(StringRef Name); + + /// Wrap a global symbol. + void wrapSymbol(StringRef SymName); + /// @LOCALMOD-END + /// Set the module-scope inline assembly blocks. void setModuleInlineAsm(StringRef Asm) { GlobalScopeAsm = Asm; @@ -584,6 +633,11 @@ public: /// Dump the module to stderr (for debugging). void dump() const; + /// @LOCALMOD-BEGIN + /// Print the PNaCl metadata for the module. + void dumpMeta(raw_ostream &OS) const; + /// @LOCALMOD-END + /// This function causes all the subinstructions to "let go" of all references /// that they are maintaining. This allows one to 'delete' a whole class at /// a time, even though there may be circular references... first all diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index b676e91eba..a67a6ac09e 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -325,6 +325,7 @@ enum { ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000 ELFOSABI_C6000_LINUX = 65, // Linux TMS320C6000 ELFOSABI_ARM = 97, // ARM + ELFOSABI_NACL = 123, // Native Client // @LOCALMOD ELFOSABI_STANDALONE = 255 // Standalone (embedded) application }; diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h index dbcf0fd11d..5e98fbd07a 100644 --- a/include/llvm/Support/ValueHandle.h +++ b/include/llvm/Support/ValueHandle.h @@ -104,6 +104,11 @@ protected: void setValPtrInt(unsigned K) { VP.setInt(K); } unsigned getValPtrInt() const { return VP.getInt(); } + // @LOCALMOD-BEGIN -- Hack for bug: + // http://code.google.com/p/nativeclient/issues/detail?id=2786 + void setKind(HandleBaseKind K) { PrevPair.setInt(K); } + // @LOCALMOD-END + static bool isValid(Value *V) { return V && V != DenseMapInfo<Value *>::getEmptyKey() && @@ -232,6 +237,15 @@ public: return getValPtr(); } + // @LOCALMOD-BEGIN -- Hack for bug: + // http://code.google.com/p/nativeclient/issues/detail?id=2786 + // This allows us to weaken the Asserting Value Handle in LexicalScopes.h, + // for Debug info only. + void make_weak() { + setKind(Weak); + } + // @LOCALMOD-END + ValueTy *operator->() const { return getValPtr(); } ValueTy &operator*() const { return *getValPtr(); } }; diff --git a/include/llvm/Support/support_macros.h b/include/llvm/Support/support_macros.h new file mode 100644 index 0000000000..83d62c722c --- /dev/null +++ b/include/llvm/Support/support_macros.h @@ -0,0 +1,25 @@ +// Define support macros for defining classes, etc. + +#ifndef LLVM_SUPPORT_SUPPORT_MACROS_H__ +#define LLVM_SUPPORT_SUPPORT_MACROS_H__ + +// Define macro, to use within a class declaration, to disallow constructor +// copy. Defines copy constructor declaration under the assumption that it +// is never defined. +#define DISALLOW_CLASS_COPY(class_name) \ + class_name(class_name& arg) // Do not implement + +// Define macro, to use within a class declaration, to disallow assignment. +// Defines assignment operation declaration under the assumption that it +// is never defined. +#define DISALLOW_CLASS_ASSIGN(class_name) \ + void operator=(class_name& arg) // Do not implement + +// Define macro to add copy and assignment declarations to a class file, +// for which no bodies will be defined, effectively disallowing these from +// being defined in the class. +#define DISALLOW_CLASS_COPY_AND_ASSIGN(class_name) \ + DISALLOW_CLASS_COPY(class_name); \ + DISALLOW_CLASS_ASSIGN(class_name) + +#endif // LLVM_SUPPORT_SUPPORT_MACROS_H__ diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h index 0d164f688d..844013ed5d 100644 --- a/include/llvm/Support/system_error.h +++ b/include/llvm/Support/system_error.h @@ -597,7 +597,7 @@ enum _ { #else stream_timeout = ETIMEDOUT, #endif - text_file_busy = ETXTBSY, + text_file_busy = EINVAL, // @LOCALMOD timed_out = ETIMEDOUT, too_many_files_open_in_system = ENFILE, too_many_files_open = EMFILE, diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 12f5c0eb30..5fb12f503e 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -766,6 +766,40 @@ def LIFETIME_END : Instruction { let AsmString = "LIFETIME_END"; let neverHasSideEffects = 1; } +// @LOCALMOD-BEGIN +def BUNDLE_ALIGN_START : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = ""; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let isNotDuplicable = 1; +} +def BUNDLE_ALIGN_END : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = ""; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let isNotDuplicable = 1; +} +def BUNDLE_LOCK : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = ""; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let isNotDuplicable = 1; +} +def BUNDLE_UNLOCK : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins); + let AsmString = ""; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; + let isNotDuplicable = 1; +} +// @LOCALMOD-END } //===----------------------------------------------------------------------===// diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h index d56db7b511..7df3bfa473 100644 --- a/include/llvm/Target/TargetFrameLowering.h +++ b/include/llvm/Target/TargetFrameLowering.h @@ -48,11 +48,19 @@ private: unsigned StackAlignment; unsigned TransientStackAlignment; int LocalAreaOffset; + + // @LOCALMOD-BEGIN + // TODO(pdox): Refactor this and upstream it, to get rid of the + // assumption that StackSlotSize == PointerSize. + unsigned StackSlotSize; + // @LOCALMOD-END public: - TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO, - unsigned TransAl = 1) + TargetFrameLowering(StackDirection D, + unsigned StackAl, int LAO, + unsigned TransAl = 1, + unsigned SlotSize = 0) // @LOCALMOD : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl), - LocalAreaOffset(LAO) {} + LocalAreaOffset(LAO), StackSlotSize(SlotSize) {} virtual ~TargetFrameLowering(); @@ -63,6 +71,11 @@ public: /// StackDirection getStackGrowthDirection() const { return StackDir; } + // @LOCALMOD-BEGIN + /// getStackSlotSize - Return the size of a stack slot + unsigned getStackSlotSize() const { return StackSlotSize; } + // @LOCALMOD-END + /// getStackAlignment - This method returns the number of bytes to which the /// stack pointer must be aligned on entry to a function. Typically, this /// is the largest alignment for any data object in the target. diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h index 044afd9b73..c2bb376131 100644 --- a/include/llvm/Target/TargetJITInfo.h +++ b/include/llvm/Target/TargetJITInfo.h @@ -129,6 +129,25 @@ namespace llvm { /// separately allocated heap memory rather than in the same /// code memory allocated by JITCodeEmitter. virtual bool allocateSeparateGVMemory() const { return false; } + + // @LOCALMOD-START + // NaCl-specific, target-specific stuff + typedef struct { uint8_t *ins; int len; } HaltInstruction; + /// Get a sequence of NOPs of length len. Returns a pointer to a buffer + /// containing a val + virtual const uint8_t *getNopSequence(size_t len) const { return NULL; } + /// Get the length and definition of the halt/roadblock instruction + virtual const HaltInstruction *getHalt() const { return NULL; } + virtual int getBundleSize() const { return 0; } + virtual int32_t getJumpMask() const { return 0; } + + /// Relocations cannot happen in-place in NaCl because we can't write to + /// code. This function takes a pointer to where the code has been emitted, + /// before it is copied to the code region. The subsequent call to + /// relocate takes pointers to the target code location, but rewrites the + /// code in the relocation buffer rather than at the target + virtual void setRelocationBuffer(unsigned char * BufferBegin) {} + // @LOCALMOD-END protected: bool useGOT; }; diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 580a30fcd2..f8925f25a1 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -121,6 +121,18 @@ public: // mask (ex: x86 blends). }; + // @LOCALMOD-START + // This needs to be kept in sync with + // native_client/src/untrusted/nacl/pnaclintrin.h. + enum PnaclTargetArchitecture { + PnaclTargetArchitectureInvalid = 0, + PnaclTargetArchitectureX86_32, + PnaclTargetArchitectureX86_64, + PnaclTargetArchitectureARM_32, + PnaclTargetArchitectureARM_32_Thumb + }; + // @LOCALMOD-END + static ISD::NodeType getExtendForContent(BooleanContent Content) { switch (Content) { case UndefinedBooleanContent: diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index 516e0706b8..2c9459974a 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -91,7 +91,14 @@ namespace TargetOpcode { /// Lifetime markers. LIFETIME_START = 15, - LIFETIME_END = 16 + LIFETIME_END = 16, + + // @LOCALMOD-BEGIN + BUNDLE_ALIGN_START = 14, + BUNDLE_ALIGN_END = 15, + BUNDLE_LOCK = 16, + BUNDLE_UNLOCK = 17 + // @LOCALMOD-END }; } // end namespace TargetOpcode } // end namespace llvm diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 68ca567836..0a1b73e352 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -30,6 +30,12 @@ namespace llvm { }; } + // @LOCALMOD-BEGIN + /// TLSUseCall - This flag enables the use of a function call to get the + /// thread pointer for TLS accesses, instead of using inline code. + extern bool TLSUseCall; + // @LOCALMOD-END + namespace FPOpFusion { enum FPOpFusionMode { Fast, // Enable fusion of FP ops wherever it's profitable. diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h new file mode 100644 index 0000000000..fe29463a8b --- /dev/null +++ b/include/llvm/Transforms/NaCl.h @@ -0,0 +1,21 @@ +//===-- NaCl.h - NaCl Transformations ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_NACL_H +#define LLVM_TRANSFORMS_NACL_H + +namespace llvm { + +class ModulePass; + +ModulePass *createExpandCtorsPass(); + +} + +#endif diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index a5d8eed746..1ddca844c9 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -372,7 +372,7 @@ extern char &InstructionSimplifierID; // "block_weights" metadata. FunctionPass *createLowerExpectIntrinsicPass(); - +FunctionPass *createNaClCcRewritePass(const TargetLowering *TLI = 0); } // End llvm namespace #endif diff --git a/include/llvm/Value.h b/include/llvm/Value.h index 5b19435eba..be218183e5 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -105,6 +105,12 @@ public: /// Type *getType() const { return VTy; } + // @LOCALMOD-START + // Currently only used for function type update during + // the NaCl calling convention rewrite pass + void setType(Type* t) { VTy = t; } + // @LOCALMOD-END + /// All values hold a context through their type. LLVMContext &getContext() const; diff --git a/include/llvm/Wrap/BCHeaderField.h b/include/llvm/Wrap/BCHeaderField.h new file mode 100644 index 0000000000..40a3714c9f --- /dev/null +++ b/include/llvm/Wrap/BCHeaderField.h @@ -0,0 +1,106 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +#ifndef LLVM_WRAP_BCHEADERFIELD_H +#define LLVM_WRAP_BCHEADERFIELD_H +#include <limits> +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +// Class representing a variable-size metadata field in the bitcode header. +// Also contains the list of known Tag IDs. +// Contains a pointer to the data but does not own the data, so it can be +// copied with the trivial copy constructor/assignment operator. + +// The serialized format has 2 fixed subfields (ID and length) and the +// variable-length data subfield +class BCHeaderField { + public: + typedef enum { + kInvalid = 0, + kBitcodeHash = 1, + kAndroidCompilerVersion = 0x4001, + kAndroidOptimizationLevel = 0x4002 + } Tag; + typedef uint16_t FixedSubfield; + + BCHeaderField(Tag ID, size_t len, uint8_t* data) : + ID_(ID), len_(len), data_(data) {} + size_t GetTotalSize() { + // Round up to 4 byte alignment + return (kTagLenSize + len_ + 3) & ~3; + } + + bool Write(uint8_t* buf, size_t buf_len) { + size_t fields_len = kTagLenSize + len_; + size_t pad_len = (4 - (fields_len & 3)) & 3; + // Ensure buffer is large enough and that length can be represented + // in 16 bits + if (buf_len < fields_len + pad_len || + len_ > std::numeric_limits<FixedSubfield>::max()) return false; + + WriteFixedSubfield(static_cast<FixedSubfield>(ID_), buf); + WriteFixedSubfield(static_cast<FixedSubfield>(len_), + buf + sizeof(FixedSubfield)); + memcpy(buf + kTagLenSize, data_, len_); + // Pad out to 4 byte alignment + if (pad_len) { + memset(buf + fields_len, 0, pad_len); + } + return true; + } + + bool Read(const uint8_t* buf, size_t buf_len) { + if (buf_len < kTagLenSize) return false; + FixedSubfield field; + ReadFixedSubfield(&field, buf); + ID_ = static_cast<Tag>(field); + ReadFixedSubfield(&field, buf + sizeof(FixedSubfield)); + len_ = static_cast<size_t>(field); + if (buf_len < kTagLenSize + len_) return false; + memcpy(data_, buf + kTagLenSize, len_); + return true; + } + + void Print() { + fprintf(stderr, "Field ID: %d, data length %d, total length %d\n", + ID_, static_cast<int>(len_), static_cast<int>(GetTotalSize())); + fprintf(stderr, "Data: "); + for (size_t i = 0; i < len_; i++) fprintf(stderr, "%02x", data_[i]); + fprintf(stderr, "\n"); + } + + // Get the data size from a serialized field to allow allocation + static size_t GetDataSizeFromSerialized(const uint8_t* buf) { + FixedSubfield len; + ReadFixedSubfield(&len, buf + sizeof(FixedSubfield)); + return len; + } + + Tag getID() const { + return ID_; + } + + size_t getLen() const { + return len_; + } + + private: + // Combined size of the fixed subfields + const static size_t kTagLenSize = 2 * sizeof(FixedSubfield); + static void WriteFixedSubfield(FixedSubfield value, uint8_t* buf) { + buf[0] = value & 0xFF; + buf[1] = (value >> 8) & 0xFF; + } + static void ReadFixedSubfield(FixedSubfield* value, const uint8_t* buf) { + *value = buf[0] | buf[1] << 8; + } + Tag ID_; + size_t len_; + uint8_t *data_; +}; + +#endif diff --git a/include/llvm/Wrap/bitcode_wrapperer.h b/include/llvm/Wrap/bitcode_wrapperer.h new file mode 100644 index 0000000000..89f2a4cbcc --- /dev/null +++ b/include/llvm/Wrap/bitcode_wrapperer.h @@ -0,0 +1,192 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +// Define utility class to wrap/unwrap bitcode files. Does wrapping/unwrapping +// in such a way that the wrappered bitcode file is still a bitcode file. + +#ifndef LLVM_WRAP_BITCODE_WRAPPERER_H__ +#define LLVM_WRAP_BITCODE_WRAPPERER_H__ + +#include <stdint.h> +#include <stddef.h> +#include <vector> + +#include "llvm/Support/support_macros.h" +#include "llvm/Wrap/BCHeaderField.h" +#include "llvm/Wrap/wrapper_input.h" +#include "llvm/Wrap/wrapper_output.h" + +// The bitcode wrapper header is the following 7 fixed 4-byte fields: +// 1) 0B17C0DE - The magic number expected by llvm for wrapped bitcodes +// 2) Version # 0 - The current version of wrapped bitcode files +// 3) (raw) bitcode offset +// 4) (raw) bitcode size +// 5) Android header version +// 6) Android target API +// 7) PNaCl Bitcode version +// plus 0 or more variable-length fields (consisting of ID, length, data) + +// Initial buffer size. It is expanded if needed to hold large variable-size +// fields. +static const size_t kBitcodeWrappererBufferSize = 1024; + +// Support class for outputting a wrapped bitcode file from a raw bitcode +// file (and optionally additional header fields), or for outputting a raw +// bitcode file from a wrapped one. +class BitcodeWrapperer { + public: + // Create a bitcode wrapperer using the following + // input and output files. + BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile); + + // Returns true if the input file begins with a bitcode + // wrapper magic number. As a side effect, _wrapper_ fields are set. + bool IsInputBitcodeWrapper(); + + // Returns true if the input file begins with a bitcode + // file magic number. + bool IsInputBitcodeFile(); + + // Add a variable-length field to the header. The caller is responsible + // for freeing the data pointed to by the BCHeaderField. + void AddHeaderField(BCHeaderField* field); + + // Generate a wrapped bitcode file from the input bitcode file + // and the current header data. Return true on success. + bool GenerateWrappedBitcodeFile(); + + // Unwrap the wrapped bitcode file, to the corresponding + // outfile. Return true on success. + bool GenerateRawBitcodeFile(); + + // Print current wrapper header fields to stderr for debugging. + void PrintWrapperHeader(); + + ~BitcodeWrapperer(); + + private: + DISALLOW_CLASS_COPY_AND_ASSIGN(BitcodeWrapperer); + + // Refills the buffer with more bytes. Does this in a way + // such that it is maximally filled. + void FillBuffer(); + + // Returns the number of bytes in infile. + off_t GetInFileSize() { + if (infile_ != NULL) { + return infile_->Size(); + } else { + return 0; + } + } + + // Returns the offset of bitcode (i.e. the size of the wrapper header) + // if the output file were to be written now. + size_t BitcodeOffset(); + + // Returns true if we can read a word. If necessary, fills the buffer + // with enough characters so that there are at least a 32-bit value + // in the buffer. Returns false if there isn't a 32-bit value + // to read from the input file. + bool CanReadWord(); + + // Read a (32-bit) word from the input. Return true + // if able to read the word. + bool ReadWord(uint32_t& word); + + // Write a (32-bit) word to the output. Return true if successful + bool WriteWord(uint32_t word); + + // Write all variable-sized header fields to the output. Return true + // if successful. + bool WriteVariableFields(); + + // Parse the bitcode wrapper header in the infile, if any. Return true + // if successful. + bool ParseWrapperHeader(); + + // Returns the i-th character in front of the cursor in the buffer. + uint8_t BufferLookahead(int i) { return buffer_[cursor_ + i]; } + + // Returns how many unread bytes are in the buffer. + size_t GetBufferUnreadBytes() { return buffer_size_ - cursor_; } + + + // Backs up the read cursor to the beginning of the input buffer. + void ResetCursor() { + cursor_ = 0; + } + + // Generates the header sequence for the wrapped bitcode being + // generated. + bool WriteBitcodeWrapperHeader(); + + // Copies size bytes of infile to outfile, using the buffer. + bool BufferCopyInToOut(uint32_t size); + + // Discards the old infile and replaces it with the given file. + void ReplaceInFile(WrapperInput* new_infile); + + // Discards the old outfile and replaces it with the given file. + void ReplaceOutFile(WrapperOutput* new_outfile); + + // Moves to the given position in the input file. Returns false + // if unsuccessful. + bool Seek(uint32_t pos); + + // Clear the buffer of all contents. + void ClearBuffer(); + + // The input file being processed. Can be either + // a bitcode file, a wrappered bitcode file, or a secondary + // file to be wrapped. + WrapperInput* infile_; + + // The output file being generated. Can be either + // a bitcode file, a wrappered bitcode file, or a secondary + // unwrapped file. + WrapperOutput* outfile_; + + // A buffer of bytes read from the input file. + std::vector<uint8_t> buffer_; + + // The number of bytes that were read from the input file + // into the buffer. + size_t buffer_size_; + + // The index to the current read point within the buffer. + size_t cursor_; + + // True when eof of input is reached. + bool infile_at_eof_; + + // The 32-bit value defining the offset of the raw bitcode in the input file. + uint32_t infile_bc_offset_; + + // The 32-bit value defining the generated offset of the wrapped bitcode. + // This value changes as new fields are added with AddHeaderField + uint32_t wrapper_bc_offset_; + + // The 32-bit value defining the size of the raw wrapped bitcode. + uint32_t wrapper_bc_size_; + + // Android header version and target API + uint32_t android_header_version_; + uint32_t android_target_api_; + + // PNaCl bitcode version + uint32_t pnacl_bc_version_; + + // Vector of variable header fields + std::vector<BCHeaderField> header_fields_; + // If any bufferdata from header fields is owned, it is stored here and + // freed on destruction. + std::vector<uint8_t*> variable_field_data_; + + // True if there was an error condition (e.g. the file is not bitcode) + bool error_; +}; + +#endif // LLVM_WRAP_BITCODE_WRAPPERER_H__ diff --git a/include/llvm/Wrap/file_wrapper_input.h b/include/llvm/Wrap/file_wrapper_input.h new file mode 100644 index 0000000000..9f3de004c4 --- /dev/null +++ b/include/llvm/Wrap/file_wrapper_input.h @@ -0,0 +1,48 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +// Defines utility allowing files for bitcode input wrapping. + +#ifndef FILE_WRAPPER_INPUT_H__ +#define FILE_WRAPPER_INPUT_H__ + +#include "llvm/Support/support_macros.h" +#include "llvm/Wrap/wrapper_input.h" + +#include <stdio.h> +#include <string> + +// Define a class to wrap named files. +class FileWrapperInput : public WrapperInput { + public: + FileWrapperInput(const std::string& name); + ~FileWrapperInput(); + // Tries to read the requested number of bytes into the buffer. Returns the + // actual number of bytes read. + virtual size_t Read(uint8_t* buffer, size_t wanted); + // Returns true if at end of file. Note: May return false + // until Read is called, and returns 0. + virtual bool AtEof(); + // Returns the size of the file (in bytes). + virtual off_t Size(); + // Moves to the given offset within the file. Returns + // false if unable to move to that position. + virtual bool Seek(uint32_t pos); + private: + // The name of the file. + std::string _name; + // True once eof has been encountered. + bool _at_eof; + // True if size has been computed. + bool _size_found; + // The size of the file. + off_t _size; + // The corresponding (opened) file. + FILE* _file; + private: + DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperInput); +}; + +#endif // FILE_WRAPPER_INPUT_H__ diff --git a/include/llvm/Wrap/file_wrapper_output.h b/include/llvm/Wrap/file_wrapper_output.h new file mode 100644 index 0000000000..714bd36a75 --- /dev/null +++ b/include/llvm/Wrap/file_wrapper_output.h @@ -0,0 +1,34 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +// Defines utility allowing files for bitcode output wrapping. + +#ifndef FILE_WRAPPER_OUTPUT_H__ +#define FILE_WRAPPER_OUTPUT_H__ + +#include "llvm/Support/support_macros.h" +#include "llvm/Wrap/wrapper_output.h" +#include <stdio.h> +#include <string> + +// Define a class to wrap named files. */ +class FileWrapperOutput : public WrapperOutput { + public: + FileWrapperOutput(const std::string& name); + ~FileWrapperOutput(); + // Writes a single byte, returning false if unable to write. + virtual bool Write(uint8_t byte); + // Writes the specified number of bytes in the buffer to + // output. Returns false if unable to write. + virtual bool Write(const uint8_t* buffer, size_t buffer_size); + private: + // The name of the file + std::string _name; + // The corresponding (opened) file. + FILE* _file; + private: + DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperOutput); +}; +#endif // FILE_WRAPPER_OUTPUT_H__ diff --git a/include/llvm/Wrap/wrapper_input.h b/include/llvm/Wrap/wrapper_input.h new file mode 100644 index 0000000000..cde918083a --- /dev/null +++ b/include/llvm/Wrap/wrapper_input.h @@ -0,0 +1,38 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +// Define a generic interface to a file/memory region that contains +// a bitcode file, a wrapped bitcode file, or a data file to wrap. + +#ifndef LLVM_WRAP_WRAPPER_INPUT_H__ +#define LLVM_WRAP_WRAPPER_INPUT_H__ + +#include <stdint.h> +#include <sys/types.h> + +#include "llvm/Support/support_macros.h" + +// The following is a generic interface to a file/memory region that contains +// a bitcode file, a wrapped bitcode file, or data file to wrap. +class WrapperInput { + public: + WrapperInput() {} + virtual ~WrapperInput() {} + // Tries to read the requested number of bytes into the buffer. Returns the + // actual number of bytes read. + virtual size_t Read(uint8_t* buffer, size_t wanted) = 0; + // Returns true if at end of input. Note: May return false until + // Read is called, and returns 0. + virtual bool AtEof() = 0; + // Returns the size of the input (in bytes). + virtual off_t Size() = 0; + // Moves to the given offset within the input region. Returns false + // if unable to move to that position. + virtual bool Seek(uint32_t pos) = 0; + private: + DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperInput); +}; + +#endif // LLVM_WRAP_WRAPPER_INPUT_H__ diff --git a/include/llvm/Wrap/wrapper_output.h b/include/llvm/Wrap/wrapper_output.h new file mode 100644 index 0000000000..7045705991 --- /dev/null +++ b/include/llvm/Wrap/wrapper_output.h @@ -0,0 +1,34 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +// Defines a generic interface to a file/memory region that +// contains a generated wrapped bitcode file, bitcode file, +// or data file. + +#ifndef LLVM_WRAP_WRAPPER_OUTPUT_H__ +#define LLVM_WRAP_WRAPPER_OUTPUT_H__ + +#include <stdint.h> +#include <stddef.h> + +#include "llvm/Support/support_macros.h" + +// The following is a generic interface to a file/memory region +// that contains a generated bitcode file, wrapped bitcode file, +// or a data file. +class WrapperOutput { + public: + WrapperOutput() {} + virtual ~WrapperOutput() {} + // Writes a single byte, returning false if unable to write. + virtual bool Write(uint8_t byte) = 0; + // Writes the specified number of bytes in the buffer to + // output. Returns false if unable to write. + virtual bool Write(const uint8_t* buffer, size_t buffer_size); + private: + DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperOutput); +}; + +#endif // LLVM_WRAP_WRAPPER_OUTPUT_H__ diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 279343c48c..96b3925ed7 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1544,6 +1544,14 @@ bool BitcodeReader::ParseModule(bool Resume) { std::string S; if (ConvertToString(Record, 0, S)) return Error("Invalid MODULE_CODE_TRIPLE record"); + + // @LOCALMOD-BEGIN + // This hack is needed in order to get Clang compiled binaries + // working with the Gold plugin, until PNaCl backend is introduced + // in lib/Target/PNaCl. + if (S == "le32-unknown-nacl") + S = "armv7-none-linux-gnueabi"; + // @LOCALMOD-END TheModule->setTargetTriple(S); break; } @@ -2831,6 +2839,16 @@ bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { const Function *F = dyn_cast<Function>(GV); if (!F || F->isDeclaration()) return false; + // @LOCALMOD-START + // Don't dematerialize functions with BBs which have their address taken; + // it will cause any referencing blockAddress constants to also be destroyed, + // but because they are GVs, they need to stay around until PassManager + // finalization. + for (Function::const_iterator BB = F->begin(); BB != F->end(); ++BB) { + if (BB->hasAddressTaken()) + return false; + } + // @LOCALMOD-END return DeferredFunctionInfo.count(const_cast<Function*>(F)); } @@ -2980,6 +2998,9 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name, return 0; } R->setBufferOwned(false); // no buffer to delete + + R->materializeForwardReferencedFunctions(); + return M; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d74a70362a..b4f0b174b5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -156,6 +156,11 @@ bool AsmPrinter::doInitialization(Module &M) { MMI = getAnalysisIfAvailable<MachineModuleInfo>(); MMI->AnalyzeModule(M); + // @LOCALMOD-BEGIN + IsPlainObject = + (MMI->getModule()->getOutputFormat() == Module::ObjectOutputFormat); + // @LOCALMOD-END + // Initialize TargetLoweringObjectFile. const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) .Initialize(OutContext, TM); @@ -272,6 +277,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *GVSym = Mang->getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); + // @LOCALMOD-BEGIN + // For .pexe and .pso files, emit ELF type STT_OBJECT or STT_TLS instead + // of NOTYPE for undefined symbols. + // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527 + if (!GV->hasInitializer() && !IsPlainObject) { + OutStreamer.EmitSymbolAttribute(GVSym, + GV->isThreadLocal() ? MCSA_ELF_TypeTLS + : MCSA_ELF_TypeObject); + } + // @LOCALMOD-END + if (!GV->hasInitializer()) // External globals require no extra code. return; @@ -681,9 +697,14 @@ void AsmPrinter::EmitFunctionBody() { break; case TargetOpcode::EH_LABEL: - case TargetOpcode::GC_LABEL: + case TargetOpcode::GC_LABEL: { + // @LOCALMOD-START + unsigned LabelAlign = GetTargetLabelAlign(II); + if (LabelAlign) EmitAlignment(LabelAlign); + // @LOCALMOD-END OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); break; + } case TargetOpcode::INLINEASM: EmitInlineAsm(II); break; @@ -699,6 +720,20 @@ void AsmPrinter::EmitFunctionBody() { case TargetOpcode::KILL: if (isVerbose()) emitKill(II, *this); break; + // @LOCALMOD-BEGIN + case TargetOpcode::BUNDLE_ALIGN_START: + OutStreamer.EmitBundleAlignStart(); + break; + case TargetOpcode::BUNDLE_ALIGN_END: + OutStreamer.EmitBundleAlignEnd(); + break; + case TargetOpcode::BUNDLE_LOCK: + OutStreamer.EmitBundleLock(); + break; + case TargetOpcode::BUNDLE_UNLOCK: + OutStreamer.EmitBundleUnlock(); + break; + // @LOCALMOD-END default: if (!TM.hasMCUseLoc()) MCLineEntry::Make(&OutStreamer, getCurrentSection()); @@ -848,6 +883,16 @@ bool AsmPrinter::doFinalization(Module &M) { const Function &F = *I; if (!F.isDeclaration()) continue; + + // @LOCALMOD-BEGIN + // For .pexe and .pso files, emit STT_FUNC for function declarations. + // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527 + if (!IsPlainObject) { + OutStreamer.EmitSymbolAttribute(Mang->getSymbol(&F), + MCSA_ELF_TypeFunction); + } + // @LOCALMOD-END + GlobalValue::VisibilityTypes V = F.getVisibility(); if (V == GlobalValue::DefaultVisibility) continue; @@ -1065,12 +1110,25 @@ void AsmPrinter::EmitJumpTableInfo() { if (// In PIC mode, we need to emit the jump table to the same section as the // function body itself, otherwise the label differences won't make sense. // FIXME: Need a better predicate for this: what about custom entries? - MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || + (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 || // We should also do if the section name is NULL or function is declared // in discardable section // FIXME: this isn't the right predicate, should be based on the MCSection // for the function. - F->isWeakForLinker()) { + // @LOCALMOD-START + // the original code is a hack + // jumptables usually end up in .rodata + // but for functions with weak linkage there is a chance that the are + // not needed. So in order to be discard the function AND the jumptable + // they keep them both in .text. This fix only works if we never discard + // weak functions. This is guaranteed because the bitcode linker already + // throws out unused ones. + // TODO: Investigate the other case of concern -- PIC code. + // Concern is about jumptables being in a different section: can the + // rodata and text be too far apart for a RIP-relative offset? + F->isWeakForLinker()) + && !UseReadOnlyJumpTables()) { + // @LOCALMOD-END OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); } else { // Otherwise, drop it in the readonly section. @@ -1097,7 +1155,7 @@ void AsmPrinter::EmitJumpTableInfo() { // .set directive for each unique entry. This reduces the number of // relocations the assembler will generate for the jump table. if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && - MAI->hasSetDirective()) { + MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; const TargetLowering *TLI = TM.getTargetLowering(); const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); @@ -1180,7 +1238,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If we have emitted set directives for the jump table entries, print // them rather than the entries themselves. If we're emitting PIC, then // emit the table entries as differences between two text section labels. - if (MAI->hasSetDirective()) { + if (MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD // If we used .set, reference the .set's symbol. Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()), OutContext); @@ -1200,7 +1258,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } - /// EmitSpecialLLVMGlobal - Check to see if the specified global is a /// special global used by LLVM. If so, emit it and return true, otherwise /// do nothing and return false. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 367b523079..22535fe5b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -571,7 +571,8 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, - StringRef DirName) { + StringRef DirName, + StringRef Extra) { // @LOCALMOD // If FE did not provide a file name, then assume stdin. if (FileName.empty()) return GetOrCreateSourceID("<stdin>", StringRef()); @@ -587,6 +588,9 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, NamePair += DirName; NamePair += '\0'; // Zero bytes are not allowed in paths. NamePair += FileName; + // @LOCALMOD + NamePair += '\0'; // Zero bytes are not allowed in paths. + NamePair += Extra; StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId); if (Ent.getValue() != SrcId) @@ -598,13 +602,37 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, return SrcId; } +// @LOCALMOD-BEGIN +// A special version of GetOrCreateSourceID for CompileUnits. +// It is possible that with bitcode linking, we end up with distinct +// compile units based on the same source file. +// E.g., compile foo.c with -DMACRO1 to foo1.bc, then compile +// foo.c again with -DMACRO2 to foo2.bc and link. +// We use additional information to form a unique ID in that case. +unsigned DwarfDebug::GetOrCreateCompileUnitID(StringRef Filename, + StringRef Dirname, + const MDNode *N) { + std::string DIUnitStr; + raw_string_ostream ostr(DIUnitStr); + + // Using information from the compile unit (N)'s getEnumTypes(), + // getRetainedTypes(), getSubprograms(), getGlobalVariables() + // could be pretty expensive. + // Cheat and use the MDNode's address as an additional identifying factor. + // constructCompileUnit() is only called once per compile unit. + ostr << static_cast<const void*>(N); + return GetOrCreateSourceID(Filename, Dirname, ostr.str()); +} +// @LOCALMOD-END + /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN, CompilationDir); + // @LOCALMOD + unsigned ID = GetOrCreateCompileUnitID(FN, CompilationDir, N); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 61d9a51a52..475c6f86d9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -524,7 +524,16 @@ public: /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName, + StringRef Extra = ""); // @LOCALMOD for Extra + + // @LOCALMOD-BEGIN - Create an ID for CompileUnits, taking extra care + // in the case that we have multiple compile units coming from the + // same source file and directory. + unsigned GetOrCreateCompileUnitID(StringRef FileName, StringRef DirName, + const MDNode *N); + // @LOCALMOD-END + /// getStringPool - returns the entry into the start of the pool. MCSymbol *getStringPool(); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 6f4c5a2f66..90f6eec831 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -20,6 +20,7 @@ #include "BranchFolding.h" #include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineConstantPool.h" // @LOCALMOD #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -234,6 +235,21 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, } } + // @LOCALMOD-START + // This currently only used on ARM targets where the ConstantPool + // subclass is overloading getJumpTableIndex() + const std::vector<MachineConstantPoolEntry>& CPs = + MF.getConstantPool()->getConstants(); + for (unsigned i = 0, e = CPs.size(); i != e; ++i) { + if (!CPs[i].isMachineConstantPoolEntry()) continue; + unsigned *JTIndex = CPs[i].Val.MachineCPVal->getJumpTableIndex(); + if (!JTIndex) continue; + // Remember that this JT is live. + JTIsLive.set(*JTIndex); + } + // @LOCALMOD-END + + // Finally, remove dead jump tables. This happens when the // indirect jump was unreachable (and thus deleted). for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 22b9140924..6ae07dfb0b 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -33,6 +33,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, StackOffset = 0; clearFirstByValReg(); + clearHasByValInRegPosition(); // @LOCALMOD. UsedRegs.resize((TRI.getNumRegs()+31)/32); } diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 6120ae56b4..b7c9f17df9 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -92,6 +92,46 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, # define setjmp_undefined_for_msvc #endif +// @LOCALMOD-BEGIN +// Calls to these functions may materialize as part of a conversion +// from an intrinsics, e.g. llvm.memset -> memset +// So if these functions are available in bitcode form we need to: +// * make sure they do not get discarded -- if there is a chance that +// a caller might materialize +// * make sure they do not get specialized for a given callsite +// Both problems are avoided by pretending there are unknown callers. +// The function: IntrinsicLowering::AddPrototypes() below does just that. +// TODO(robertm): elaborate some more +static const char *IntrinsicNames[] = { + "abort", + "memcpy", "memset", "memmove", + "sqrtf", "sqrt", "sqrtl", + "sinf", "sin", "sinl", + "cosf", "cos", "cosl", + "powf", "pow", "powl", + "logf", "log", "logl", + "log2f", "log2", "log2l", + "log10f", "log10", "log10l", + "expf", "exp", "expl", + "exp2f", "exp2", "exp2l", + NULL +}; + +StringSet<> IntrinsicLowering::FuncNames; + +const StringSet<> &IntrinsicLowering::GetFuncNames() { + if (FuncNames.empty()) { + for (unsigned i=0; IntrinsicNames[i]; ++i) + FuncNames.insert(IntrinsicNames[i]); + } + return FuncNames; +} + +bool IntrinsicLowering::IsCalledByIntrinsic(const StringRef &FuncName) { + return IntrinsicLowering::GetFuncNames().count(FuncName) > 0; +} +// @LOCALMOD-END + void IntrinsicLowering::AddPrototypes(Module &M) { LLVMContext &Context = M.getContext(); for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 18d021d521..34b24b6085 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -164,7 +164,8 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator B = begin(), E = end(), I = E; - while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + while (I != B && ((--I)->isTerminator() || I->isDebugValue() + || I->getOpcode() == TargetOpcode::BUNDLE_UNLOCK)) // @LOCALMOD ; /*noop */ while (I != E && !I->isTerminator()) ++I; diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 4ea21d4ff7..7c7d2c8045 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -352,6 +352,16 @@ void TargetPassConfig::addIRPasses() { addPass(createTypeBasedAliasAnalysisPass()); addPass(createBasicAliasAnalysisPass()); + // @LOCALMOD-START + addPass(createNaClCcRewritePass(TM->getTargetLowering())); + // TODO: consider adding a cleanup pass, e.g. constant propagation + // Note: we run this before the verfier step because it may cause + // a *temporary* inconsistency: + // A function may have been rewritting before we are rewriting + // its callers - which would lead to a parameter mismatch complaint + // from the verifier. + // @LOCALMOD-END + // Before running any passes, run the verifier to determine if the input // coming from the front-end and/or optimizer is valid. if (!DisableVerify) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3fbf7c2fe6..be3168618e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5278,6 +5278,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::donothing: // ignore return 0; + // @LOCALMOD-BEGIN + // Native Client Intrinsics for TLS setup / layout. + case Intrinsic::nacl_tp_tls_offset: { + SDValue tls_size = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::NACL_TP_TLS_OFFSET, dl, + tls_size.getValueType(), + tls_size)); + return 0; + } + case Intrinsic::nacl_tp_tdb_offset: { + SDValue tdb_size = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(ISD::NACL_TP_TDB_OFFSET, dl, + tdb_size.getValueType(), + tdb_size)); + return 0; + } + case Intrinsic::nacl_target_arch: { + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::NACL_TARGET_ARCH, dl, DestVT)); + return 0; + } + // @LOCALMOD-END } } @@ -6454,7 +6476,10 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - TD.getABITypeAlignment(I.getType())); +// @LOCALMOD-BEGIN + TD.getCallFrameTypeAlignment(I.getType())); +// @LOCALMOD-END + setValue(&I, V); DAG.setRoot(V.getValue(1)); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6f3ce7a44b..a870ee2ac8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -313,6 +313,13 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE: return "setfalse"; case ISD::SETFALSE2: return "setfalse2"; } + + // @LOCALMOD-BEGIN + // NaCl intrinsics for TLS setup + case ISD::NACL_TP_TLS_OFFSET: return "nacl_tls_offset"; + case ISD::NACL_TP_TDB_OFFSET: return "nacl_tdb_offset"; + case ISD::NACL_TARGET_ARCH: return "nacl_target_arch"; + // @LOCALMOD-END } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c314fa5b51..20afa3def3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -564,7 +564,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. @@ -593,7 +592,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); - // Run the DAG combiner in post-type-legalize mode. { NamedRegionTimer T("DAG Combining after legalize types", GroupName, @@ -615,10 +613,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } - if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); - // Run the DAG combiner in post-type-legalize mode. { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, @@ -629,19 +625,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } - if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); - { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); - // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 8f5d770f66..6df4a0aa2a 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -55,8 +55,16 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, case dwarf::DW_EH_PE_absptr: return Mang->getSymbol(GV); case dwarf::DW_EH_PE_pcrel: { + // @LOCALMOD-BEGIN + // The dwarf section label should not include the version suffix. + // Strip it off here. + StringRef Name = Mang->getSymbol(GV)->getName(); + size_t atpos = Name.find("@"); + if (atpos != StringRef::npos) + Name = Name.substr(0, atpos); + // @LOCALMOD-END return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - Mang->getSymbol(GV)->getName()); + Name); // @LOCALMOD } } } @@ -65,7 +73,15 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM, const MCSymbol *Sym) const { SmallString<64> NameData("DW.ref."); - NameData += Sym->getName(); + // @LOCALMOD-BEGIN + // The dwarf section label should not include the version suffix. + // Strip it off here. + StringRef Name = Sym->getName(); + size_t atpos = Name.find("@"); + if (atpos != StringRef::npos) + Name = Name.substr(0, atpos); + // @LOCALMOD-END + NameData += Name; // @LOCALMOD MCSymbol *Label = getContext().GetOrCreateSymbol(NameData); Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); Streamer.EmitSymbolAttribute(Label, MCSA_Weak); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index e16e2d112a..e3b90fdf78 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -322,7 +322,9 @@ GenericValue lle_X_abort(FunctionType *FT, const std::vector<GenericValue> &Args) { //FIXME: should we report or raise here? //report_fatal_error("Interpreted program raised SIGABRT"); - raise (SIGABRT); + //TODO(dschuff) fixme or figure out how to get raise() + abort(); // @LOCALMOD + //raise (SIGABRT); return GenericValue(); } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 2ae155bebf..338db8f454 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -210,6 +210,8 @@ public: private: static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, TargetMachine &tm); + // Native client needs its own memory manager, so custom ones are unsupported + static JITCodeEmitter *createNaClEmitter(JIT &J, TargetMachine &tm); void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); void updateFunctionStub(Function *F); void jitTheFunction(Function *F, const MutexGuard &locked); diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ecafda7286..1c5abf751d 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -30,6 +30,7 @@ #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/NaClJITMemoryManager.h" #include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetJITInfo.h" @@ -52,12 +53,15 @@ #ifndef NDEBUG #include <iomanip> #endif +#ifdef __native_client__ +#include <nacl/nacl_dyncode.h> +#endif using namespace llvm; STATISTIC(NumBytes, "Number of bytes of machine code compiled"); STATISTIC(NumRelos, "Number of relocations applied"); STATISTIC(NumRetries, "Number of retries with more memory"); - +STATISTIC(NumNopBytes, "Number of bytes of NOPs emitted"); // A declaration may stop being a declaration once it's fully read from bitcode. // This function returns true if F is fully read and is still a declaration. @@ -281,8 +285,6 @@ namespace { /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is /// used to output functions to memory for execution. class JITEmitter : public JITCodeEmitter { - JITMemoryManager *MemMgr; - // When outputting a function stub in the context of some other function, we // save BufferBegin/BufferEnd/CurBufferPtr here. uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; @@ -292,11 +294,13 @@ namespace { // ask the memory manager for at least this much space. When we // successfully emit the function, we reset this back to zero. uintptr_t SizeEstimate; - +protected: //TODO:(dschuff): fix/move this once we do validation and are sure + // which functions/data we need in NaClJITEmitter. also add LOCALMOD + JITMemoryManager *MemMgr; /// Relocations - These are the relocations that the function needs, as /// emitted. std::vector<MachineRelocation> Relocations; - +private: /// MBBLocations - This vector is a mapping from MBB ID's to their address. /// It is filled in by the StartMachineBasicBlock callback and queried by /// the getMachineBasicBlockAddress callback. @@ -380,7 +384,7 @@ namespace { DE.reset(new JITDwarfEmitter(jit)); } } - ~JITEmitter() { + virtual ~JITEmitter() { // @LOCALMOD delete MemMgr; } @@ -393,10 +397,10 @@ namespace { void initJumpTableInfo(MachineJumpTableInfo *MJTI); void emitJumpTableInfo(MachineJumpTableInfo *MJTI); - void startGVStub(const GlobalValue* GV, + virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, unsigned Alignment = 1); - void startGVStub(void *Buffer, unsigned StubSize); - void finishGVStub(); + virtual void startGVStub(void *Buffer, unsigned StubSize); + virtual void finishGVStub(); virtual void *allocIndirectGV(const GlobalValue *GV, const uint8_t *Buffer, size_t Size, unsigned Alignment); @@ -468,6 +472,360 @@ namespace { bool MayNeedFarStub); void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference); }; + + // @LOCALMOD-START + class NaClJITEmitter : public JITEmitter { + /* There are two Nacl-specific requirements that must be dealt with: the + * first is that the data and code spaces are strictly separated, and code + * must be copied (by the service runtime/validator)to its destination + * after emission and relocation have finished. + * The second is bundle alignment: neither instructions nor multi- + * instruction pseudoinstruction groups may cross bundle boundaries. + * + * Requirement 1 is dealt with jointly by NaClJITMemoryManager and + * and NaClJITEmitter. NaClJITMemoryManager separates metadata from + * code and returns pointers in the proper space + * for code (startFunctionBody, allocateStub) and data (allocateSpace, + * startExceptionTable, etc). NaClJITEmitter emits code into a separate + * memory buffer (EmissionBuffer). After startFunction allocates the + * function's memory, NaClJITEmitter's startFunction points BufferBegin, + * CurBufferPtr and BufferEnd at the EmissionBuffer (this avoids having to + * override all of the actual emission methods from JITCodeEmitter) + * JITEmitter already uses this trick for emitting a stub in the middle + * of emitting a function so it doesn't seem so terrible to do our own + * similar swapping of the pointers. + * + * Requirement 2 is bundle alignment. + * X86CodeEmitter makes several calls into JITCodeEmitter per instruction, + * to add the various bytes, constants, etc. To implement bundle alignment, + * we add methods to start and end a bundle-locked group + * (the group can include just one instruction or several). + * The X86CodeEmitter will pass-through any such markers created by the + * rewriting passes (which surround multiple-instruction groups), + * and will also generate them surrounding each individual instruction + * (there should never be more than two-deep nesting). + * When beginBundleLock is called, the CurBufferPtr is marked. When + * endBundleLock is called, it checks that the group does not cross a + * bundle boundary; if it does, it inserts nop padding as necessary. + * If padding is added, the relocations must also be fixed up; this also + * happens in endBundleLock. + * + */ + public: + NaClJITEmitter(JIT &jit, TargetMachine &TM) : + JITEmitter(jit, new NaClJITMemoryManager(), TM), + BundleLockSavedCurBufferPtr(NULL), + BundleNestCount(0), + AlignNextGroup(kNone), + GroupRelocationCount(0), + JITInfo(&jit.getJITInfo()), + kBundleSize(jit.getJITInfo().getBundleSize()), + kJumpMask(jit.getJITInfo().getJumpMask()) { + uintptr_t CodeSlabSize = MemMgr->GetDefaultCodeSlabSize(); + EmissionBuffer = MemMgr->allocateSpace(CodeSlabSize, kBundleSize); + EmissionBufferSize = CodeSlabSize; + DEBUG(dbgs() << "EmissionBuffer " << EmissionBuffer << " size " + << EmissionBufferSize << "\n"); + StubEmissionBuffer = MemMgr->allocateSpace(kBundleSize, kBundleSize); + StubEmissionBufferSize = kBundleSize; + DEBUG(dbgs() << "StubEmissionBuffer " << StubEmissionBuffer << " size " + << StubEmissionBufferSize << "\n"); + JITInfo = &jit.getJITInfo(); + } + + virtual ~NaClJITEmitter() { + } + + static inline bool classof(const JITEmitter*) { return true; } + + virtual void startFunction(MachineFunction &F) { + JITEmitter::startFunction(F); + // Make sure the emission buffer is at least as big as the allocated + // function + if (BufferEnd - BufferBegin > (intptr_t)EmissionBufferSize) { + EmissionBufferSize = std::max((uintptr_t)(BufferEnd - BufferBegin), + 2 * EmissionBufferSize); + // BumpPtrAllocator doesn't do anything when you call Deallocate. it + // will be freed on destruction + EmissionBuffer = MemMgr->allocateSpace(EmissionBufferSize, + kBundleSize); + DEBUG(dbgs() << "new EmissionBuffer " << EmissionBuffer << " size " + << EmissionBufferSize << "\n"); + } + // We ensure that the emission buffer is bundle-aligned, and constant + // pool emission should not go into code space + assert((CurBufferPtr == BufferBegin || + (int)F.getFunction()->getAlignment() > kBundleSize) && + "Pre-function data should not be emitted into code space"); + if (CurBufferPtr > BufferBegin) { + // If CurBufferPtr has been bumped forward for alignment, we need to + // pad the space with nops + memcpy(EmissionBuffer, + JITInfo->getNopSequence(CurBufferPtr - BufferBegin), + CurBufferPtr - BufferBegin); + NumNopBytes += CurBufferPtr - BufferBegin; + } + FunctionDestination = BufferBegin; + setBufferPtrs(EmissionBuffer); + } + + virtual bool finishFunction(MachineFunction &F) { + uint8_t *end = CurBufferPtr; + emitAlignment(kBundleSize); + memcpy(end, JITInfo->getNopSequence(CurBufferPtr - end), + CurBufferPtr - end); + NumNopBytes += CurBufferPtr - end; + JITInfo->setRelocationBuffer(BufferBegin); + assert(BufferBegin == EmissionBuffer); + int FunctionSize = CurBufferPtr - BufferBegin; + setBufferPtrs(FunctionDestination); + bool result = JITEmitter::finishFunction(F); + // If we ran out of memory, don't bother validating, we'll just retry + if (result) return result; + + DEBUG({ + dbgs() << "Validating " << FunctionDestination << "-" << + FunctionDestination + FunctionSize << "\n"; + if (sys::hasDisassembler()) { + dbgs() << "Disassembled code:\n"; + dbgs() << sys::disassembleBuffer(EmissionBuffer, + FunctionSize, + (uintptr_t)FunctionDestination); + } else { + dbgs() << "Binary code:\n"; + uint8_t* q = BufferBegin; + for (int i = 0; q < CurBufferPtr; q += 4, ++i) { + if (i == 4) + i = 0; + if (i == 0) + dbgs() << "JIT: " << (long)(q - BufferBegin) << ": "; + bool Done = false; + for (int j = 3; j >= 0; --j) { + if (q + j >= CurBufferPtr) + Done = true; + else + dbgs() << (unsigned short)q[j]; + } + if (Done) + break; + dbgs() << ' '; + if (i == 3) + dbgs() << '\n'; + } + dbgs()<< '\n'; + } + }); +#ifdef __native_client__ + if(nacl_dyncode_create(FunctionDestination, EmissionBuffer, + FunctionSize) != 0) { + report_fatal_error("NaCl validation failed"); + } +#endif + return result; + } + + virtual void startGVStub(const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1) { + JITEmitter::startGVStub(GV, StubSize, Alignment); + ReusedStub = false; + assert(StubSize <= StubEmissionBufferSize); + StubDestination = BufferBegin; + setBufferPtrs(StubEmissionBuffer); + } + virtual void startGVStub(void *Buffer, unsigned StubSize) { + JITEmitter::startGVStub(Buffer, StubSize); + ReusedStub = true; + assert(StubSize <= StubEmissionBufferSize); + StubDestination = BufferBegin; + setBufferPtrs(StubEmissionBuffer); + } + virtual void finishGVStub() { + assert(CurBufferPtr - BufferBegin == kBundleSize); + + DEBUG(dbgs() << "Validating "<< BufferBegin<<"-"<<StubDestination<<"\n"); + int ValidationResult; +#ifdef __native_client__ + if (!ReusedStub) { + ValidationResult = nacl_dyncode_create(StubDestination, BufferBegin, + CurBufferPtr - BufferBegin); + } else { + // This is not a thread-safe modification because it updates the whole + // stub rather than just a jump target. However it is only used by + // eager compilation to replace a stub which is not in use yet + // (it jumps to 0). + ValidationResult = nacl_dyncode_modify(StubDestination, BufferBegin, + CurBufferPtr - BufferBegin); + } +#endif + if (ValidationResult) { + dbgs() << "NaCl stub validation failed:\n"; + if (sys::hasDisassembler()) { + dbgs() << "Disassembled code:\n"; + dbgs() << sys::disassembleBuffer(BufferBegin, + CurBufferPtr-BufferBegin, + (uintptr_t)StubDestination); + } + report_fatal_error("Stub validation failed"); + } + setBufferPtrs(StubDestination); + JITEmitter::finishGVStub(); + } + + /// allocateSpace - Allocates *data* space, rather than space in the + // current code block. + virtual void *allocateSpace(uintptr_t Size, unsigned Alignment) { + return MemMgr->allocateSpace(Size, Alignment); + } + + virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) { + uint8_t *end = CurBufferPtr; + emitAlignment(MBB->getAlignment()); + memcpy(end, JITInfo->getNopSequence(CurBufferPtr - end), + CurBufferPtr - end); + NumNopBytes += CurBufferPtr - end; + JITEmitter::StartMachineBasicBlock(MBB); + } + + /// beginBundleLock - Save the current location of CurBufferPtr so we can + // tell if the block crosses a bundle boundary + virtual void beginBundleLock() { + assert(BundleNestCount <= 2 && "Bundle-locked groups can't be nested"); + if (++BundleNestCount == 2) return; + DEBUG(dbgs() << "begin lock, buffer begin:end:cur "<<BufferBegin<<" "<< + BufferEnd<< " "<<CurBufferPtr << "\n"); + BundleLockSavedCurBufferPtr = CurBufferPtr; + GroupRelocationCount = 0; + } + + /// endBundleLock - Check if the group crosses a bundle boundary. If so + // (or if the group must be aligned to the end of a bundle), move the + // group and add appropriate padding + virtual void endBundleLock() { + assert(BundleNestCount > 0 && "mismatched bundle-lock start/end"); + if (--BundleNestCount > 0) return; + DEBUG(dbgs() <<"end lock, buffer begin:end:cur:savd "<<BufferBegin<<" "<< + BufferEnd<< " "<<CurBufferPtr <<" "<< + BundleLockSavedCurBufferPtr<<"\n"); + + int GroupLen = CurBufferPtr - BundleLockSavedCurBufferPtr; + if (BufferEnd - CurBufferPtr < + GroupLen + kBundleSize) { + // Added padding can be no more than kBundleSize. Retry if there's any + // possibility of overflow + CurBufferPtr = BufferEnd; + AlignNextGroup = kNone; + return; + } + // Space left in the current bundle + int SpaceLeft = (((intptr_t)BundleLockSavedCurBufferPtr + kBundleSize) + & kJumpMask) - (intptr_t)BundleLockSavedCurBufferPtr; + int TotalPadding = 0; + if (SpaceLeft < GroupLen || AlignNextGroup == kBegin) { + DEBUG(dbgs() << "space " << SpaceLeft <<" len "<<GroupLen<<"\n"); + memmove(BundleLockSavedCurBufferPtr + SpaceLeft, + BundleLockSavedCurBufferPtr, GroupLen); + memcpy(BundleLockSavedCurBufferPtr, JITInfo->getNopSequence(SpaceLeft), + SpaceLeft); + NumNopBytes += SpaceLeft; + assert(CurBufferPtr == BundleLockSavedCurBufferPtr + GroupLen); + CurBufferPtr += SpaceLeft; + BundleLockSavedCurBufferPtr += SpaceLeft; + TotalPadding = SpaceLeft; + SpaceLeft = kBundleSize; + } + + if (AlignNextGroup == kEnd) { + DEBUG(dbgs() << "alignend, space len "<<SpaceLeft<<" "<<GroupLen<<"\n"); + int MoveDistance = SpaceLeft - GroupLen; + memmove(BundleLockSavedCurBufferPtr + MoveDistance, + BundleLockSavedCurBufferPtr, GroupLen); + memcpy(BundleLockSavedCurBufferPtr, + JITInfo->getNopSequence(MoveDistance), MoveDistance); + NumNopBytes += MoveDistance; + CurBufferPtr += MoveDistance; + TotalPadding += MoveDistance; + } + + AlignNextGroup = kNone; + + assert(CurBufferPtr <= BufferEnd && "Bundled group caused buf overflow"); + if (TotalPadding && GroupRelocationCount) { + assert(Relocations.size() >= GroupRelocationCount && + "Too many relocations recorded for this group"); + for(std::vector<MachineRelocation>::reverse_iterator I = + Relocations.rbegin(); GroupRelocationCount > 0; + ++I, GroupRelocationCount--) { + int NewOffset = I->getMachineCodeOffset() + + TotalPadding; + I->setMachineCodeOffset(NewOffset); + } + } + } + + virtual void alignToBundleBeginning() { + // mark that the next locked group must be aligned to bundle start + // (e.g. an indirect branch target) + assert(AlignNextGroup == kNone && "Conflicting group alignments"); + AlignNextGroup = kBegin; + } + + virtual void alignToBundleEnd() { + // mark that the next locked group must be aligned to bundle end (e.g. a + // call) + assert(AlignNextGroup == kNone && "Conflicting group alignments"); + AlignNextGroup = kEnd; + } + + virtual uintptr_t getCurrentPCValue() const { + // return destination PC value rather than generating location + if (BufferBegin == EmissionBuffer) { + return (uintptr_t)(FunctionDestination + (CurBufferPtr - BufferBegin)); + } else if (BufferBegin == StubEmissionBuffer) { + return (uintptr_t)(StubDestination + (CurBufferPtr - BufferBegin)); + } else { + return (uintptr_t)CurBufferPtr; + } + } + + // addRelocation gets called in the middle of emitting an instruction, and + // creates the relocation based on the instruction's current position in + // the emission buffer; however it could get moved if it crosses the bundle + // boundary. so we intercept relocation creation and adjust newly-created + // relocations if necessary + virtual void addRelocation(const MachineRelocation &MR) { + GroupRelocationCount++; + JITEmitter::addRelocation(MR); + } + + private: + typedef enum _GroupAlign { kNone, kBegin, kEnd } GroupAlign; + // FunctionDestination points to the final destination for the function + // (i.e. where it will be copied after validation) + uint8_t *FunctionDestination; + uint8_t *BundleLockSavedCurBufferPtr; + int BundleNestCount; // should not exceed 2 + GroupAlign AlignNextGroup; + unsigned GroupRelocationCount; + uint8_t *EmissionBuffer; + uintptr_t EmissionBufferSize; + + bool ReusedStub; + uint8_t *StubDestination; + uint8_t *StubEmissionBuffer; + uintptr_t StubEmissionBufferSize; + + TargetJITInfo *JITInfo; + const int kBundleSize; + const int32_t kJumpMask; + + // Set the buffer pointers (begin, cur, end) so they point into the buffer + // at dest, preserving their relative positions + void setBufferPtrs(uint8_t* dest) { + BufferEnd = dest + (BufferEnd - BufferBegin); + CurBufferPtr = dest + (CurBufferPtr - BufferBegin); + BufferBegin = dest; + } +}; } void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { @@ -934,6 +1292,12 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // Mark code region readable and executable if it's not so already. MemMgr->setMemoryExecutable(); + // @LOCALMOD-START +#ifndef __native_client__ + // In NaCl, we haven't yet validated and copied the function code to the + // destination yet, so there is nothing to disassemble. Furthermore we can't + // touch the destination because it may not even be mapped yet + // @LOCALMOD-END DEBUG({ if (sys::hasDisassembler()) { dbgs() << "JIT: Disassembled code:\n"; @@ -963,6 +1327,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { dbgs()<< '\n'; } }); +#endif // @LOCALMOD if (JITExceptionHandling) { uintptr_t ActualSize = 0; @@ -1247,7 +1612,14 @@ void JITEmitter::EmittedFunctionConfig::onRAUW( JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &tm) { +// @LOCALMOD-START +#ifndef __native_client__ return new JITEmitter(jit, JMM, tm); +#else + assert(!JMM && "NaCl does not support custom memory managers"); + return new NaClJITEmitter(jit, tm); +#endif +// @LOCALMOD-END } // getPointerToFunctionOrStub - If the specified function has been diff --git a/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp new file mode 100644 index 0000000000..d44fee2292 --- /dev/null +++ b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp @@ -0,0 +1,430 @@ +//===-- NaClJITMemoryManager.cpp - Memory Allocator for JIT'd code --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the NaClJITMemoryManager class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "llvm/ExecutionEngine/NaClJITMemoryManager.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Config/config.h" +#include <vector> + +#if defined(__linux__) || defined(__native_client__) +#if defined(HAVE_SYS_STAT_H) +#include <sys/stat.h> +#endif +#include <fcntl.h> +#include <unistd.h> +#endif + +using namespace llvm; + +#ifdef __native_client__ +// etext is guarded by ifdef so the code still compiles on non-ELF platforms +extern char etext; +#endif + +// The way NaCl linking is currently setup, there is a gap between the text +// segment and the rodata segment where we can fill dyncode. The text ends +// at etext, but there's no symbol for the start of rodata. Currently the +// linker script puts it at 0x11000000 +// If we run out of space there, we can also allocate below the text segment +// and keep going downward until we run into code loaded by the dynamic +// linker. (TODO(dschuff): make that work) +// For now, just start at etext and go until we hit rodata + +// It's an open issue that lazy jitting is not thread safe (PR5184). However +// NaCl's dyncode_create solves exactly this problem, so in the future +// this allocator could (should?) be made thread safe + +const size_t NaClJITMemoryManager::kStubSlabSize; +const size_t NaClJITMemoryManager::kDataSlabSize; +const size_t NaClJITMemoryManager::kCodeSlabSize; + +// TODO(dschuff) fix allocation start (etext + 64M is hopefully after where +// glibc is loaded) and limit (maybe need a linker-provide symbol for the start +// of the IRT or end of the segment gap) +// (also fix allocateCodeSlab and maybe allocateStubSlab at that time) +// what we really need is a usable nacl_dyncode_alloc(), but this could still +// be improved upon using dl_iterate_phdr +const static intptr_t kNaClSegmentGapEnd = 0x11000000; + +NaClJITMemoryManager::NaClJITMemoryManager() : + AllocatableRegionLimit((uint8_t *)kNaClSegmentGapEnd), + NextCode(AllocatableRegionStart), GOTBase(NULL) { +#ifdef __native_client__ + AllocatableRegionStart = (uint8_t *)&etext + 1024*1024*64; +#else + assert(false && "NaClJITMemoryManager will not work outside NaCl sandbox"); +#endif + AllocatableRegionStart = + (uint8_t *)RoundUpToAlignment((uint64_t)AllocatableRegionStart, + kBundleSize); + NextCode = AllocatableRegionStart; + + // Allocate 1 stub slab to get us started + CurrentStubSlab = allocateStubSlab(0); + InitFreeList(&CodeFreeListHead); + InitFreeList(&DataFreeListHead); + + DEBUG(dbgs() << "NaClJITMemoryManager: AllocatableRegionStart " << + AllocatableRegionStart << " Limit " << AllocatableRegionLimit << "\n"); +} + +NaClJITMemoryManager::~NaClJITMemoryManager() { + delete [] GOTBase; + DestroyFreeList(CodeFreeListHead); + DestroyFreeList(DataFreeListHead); +} + +FreeListNode *NaClJITMemoryManager::allocateCodeSlab(size_t MinSize) { + FreeListNode *node = new FreeListNode(); + if (AllocatableRegionLimit - NextCode < (int)kCodeSlabSize) { + // TODO(dschuff): might be possible to try the space below text segment? + report_fatal_error("Ran out of code space"); + } + node->address = NextCode; + node->size = std::max(kCodeSlabSize, MinSize); + NextCode += node->size; + DEBUG(dbgs() << "allocated code slab " << NextCode - node->size << "-" << + NextCode << "\n"); + return node; +} + +SimpleSlab NaClJITMemoryManager::allocateStubSlab(size_t MinSize) { + SimpleSlab s; + DEBUG(dbgs() << "allocateStubSlab: "); + // It's a little weird to just allocate and throw away the FreeListNode, but + // since code region allocation is still a bit ugly and magical, I decided + // it's better to reuse allocateCodeSlab than duplicate the logic. + FreeListNode *n = allocateCodeSlab(MinSize); + s.address = n->address; + s.size = n->size; + s.next_free = n->address; + delete n; + return s; +} + +FreeListNode *NaClJITMemoryManager::allocateDataSlab(size_t MinSize) { + FreeListNode *node = new FreeListNode; + size_t size = std::max(kDataSlabSize, MinSize); + node->address = (uint8_t*)DataAllocator.Allocate(size, kBundleSize); + node->size = size; + return node; +} + +void NaClJITMemoryManager::InitFreeList(FreeListNode **Head) { + // Make sure there is always at least one entry in the free list + *Head = new FreeListNode; + (*Head)->Next = (*Head)->Prev = *Head; + (*Head)->size = 0; +} + +void NaClJITMemoryManager::DestroyFreeList(FreeListNode *Head) { + FreeListNode *n = Head->Next; + while(n != Head) { + FreeListNode *next = n->Next; + delete n; + n = next; + } + delete Head; +} + +FreeListNode *NaClJITMemoryManager::FreeListAllocate(uintptr_t &ActualSize, + FreeListNode *Head, + FreeListNode * (NaClJITMemoryManager::*allocate)(size_t)) { + FreeListNode *candidateBlock = Head; + FreeListNode *iter = Head->Next; + + uintptr_t largest = candidateBlock->size; + // Search for the largest free block + while (iter != Head) { + if (iter->size > largest) { + largest = iter->size; + candidateBlock = iter; + } + iter = iter->Next; + } + + if (largest < ActualSize || largest == 0) { + candidateBlock = (this->*allocate)(ActualSize); + } else { + candidateBlock->RemoveFromFreeList(); + } + return candidateBlock; +} + +void NaClJITMemoryManager::FreeListFinishAllocation(FreeListNode *Block, + FreeListNode *Head, uint8_t *AllocationStart, uint8_t *AllocationEnd, + AllocationTable &Table) { + assert(AllocationEnd > AllocationStart); + assert(Block->address == AllocationStart); + uint8_t *End = (uint8_t *)RoundUpToAlignment((uint64_t)AllocationEnd, + kBundleSize); + assert(End <= Block->address + Block->size); + int AllocationSize = End - Block->address; + Table[AllocationStart] = AllocationSize; + + Block->size -= AllocationSize; + if (Block->size >= kBundleSize * 2) {//TODO(dschuff): better heuristic? + Block->address = End; + Block->AddToFreeList(Head); + } else { + delete Block; + } + DEBUG(dbgs()<<"FinishAllocation size "<< AllocationSize <<" end "<<End<<"\n"); +} + +void NaClJITMemoryManager::FreeListDeallocate(FreeListNode *Head, + AllocationTable &Table, + void *Body) { + uint8_t *Allocation = (uint8_t *)Body; + DEBUG(dbgs() << "deallocating "<<Body<<" "); + assert(Table.count(Allocation) && "FreeList Deallocation not found in table"); + FreeListNode *Block = new FreeListNode; + Block->address = Allocation; + Block->size = Table[Allocation]; + Block->AddToFreeList(Head); + DEBUG(dbgs() << "deallocated "<< Allocation<< " size " << Block->size <<"\n"); +} + +uint8_t *NaClJITMemoryManager::startFunctionBody(const Function *F, + uintptr_t &ActualSize) { + CurrentCodeBlock = FreeListAllocate(ActualSize, CodeFreeListHead, + &NaClJITMemoryManager::allocateCodeSlab); + DEBUG(dbgs() << "startFunctionBody CurrentBlock " << CurrentCodeBlock << + " addr " << CurrentCodeBlock->address << "\n"); + ActualSize = CurrentCodeBlock->size; + return CurrentCodeBlock->address; +} + +void NaClJITMemoryManager::endFunctionBody(const Function *F, + uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + DEBUG(dbgs() << "endFunctionBody "); + FreeListFinishAllocation(CurrentCodeBlock, CodeFreeListHead, + FunctionStart, FunctionEnd, AllocatedFunctions); + +} + +uint8_t *NaClJITMemoryManager::allocateCodeSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID) { + llvm_unreachable("Implement me! (or don't.)"); +} + +uint8_t *NaClJITMemoryManager::allocateDataSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID) { + return (uint8_t *)DataAllocator.Allocate(Size, Alignment); +} + +void NaClJITMemoryManager::deallocateFunctionBody(void *Body) { + DEBUG(dbgs() << "deallocateFunctionBody, "); + if (Body) FreeListDeallocate(CodeFreeListHead, AllocatedFunctions, Body); +} + +uint8_t *NaClJITMemoryManager::allocateStub(const GlobalValue* F, + unsigned StubSize, + unsigned Alignment) { + uint8_t *StartAddress = (uint8_t *)(uintptr_t) + RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment); + if (StartAddress + StubSize > + CurrentStubSlab.address + CurrentStubSlab.size) { + CurrentStubSlab = allocateStubSlab(kStubSlabSize); + StartAddress = (uint8_t *)(uintptr_t) + RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment); + } + CurrentStubSlab.next_free = StartAddress + StubSize; + DEBUG(dbgs() <<"allocated stub "<<StartAddress<< " size "<<StubSize<<"\n"); + return StartAddress; +} + +uint8_t *NaClJITMemoryManager::allocateSpace(intptr_t Size, + unsigned Alignment) { + uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment); + DEBUG(dbgs() << "allocateSpace " << Size <<"/"<<Alignment<<" ret "<<r<<"\n"); + return r; +} + +uint8_t *NaClJITMemoryManager::allocateGlobal(uintptr_t Size, + unsigned Alignment) { + uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment); + DEBUG(dbgs() << "allocateGlobal " << Size <<"/"<<Alignment<<" ret "<<r<<"\n"); + return r; +} + +uint8_t* NaClJITMemoryManager::startExceptionTable(const Function* F, + uintptr_t &ActualSize) { + CurrentDataBlock = FreeListAllocate(ActualSize, DataFreeListHead, + &NaClJITMemoryManager::allocateDataSlab); + DEBUG(dbgs() << "startExceptionTable CurrentBlock " << CurrentDataBlock << + " addr " << CurrentDataBlock->address << "\n"); + ActualSize = CurrentDataBlock->size; + return CurrentDataBlock->address; +} + +void NaClJITMemoryManager::endExceptionTable(const Function *F, + uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister) { + DEBUG(dbgs() << "endExceptionTable "); + FreeListFinishAllocation(CurrentDataBlock, DataFreeListHead, + TableStart, TableEnd, AllocatedTables); +} + +void NaClJITMemoryManager::deallocateExceptionTable(void *ET) { + DEBUG(dbgs() << "deallocateExceptionTable, "); + if (ET) FreeListDeallocate(DataFreeListHead, AllocatedTables, ET); +} + +// Copy of DefaultJITMemoryManager's implementation +void NaClJITMemoryManager::AllocateGOT() { + assert(GOTBase == 0 && "Cannot allocate the got multiple times"); + GOTBase = new uint8_t[sizeof(void*) * 8192]; + HasGOT = true; +} + +//===----------------------------------------------------------------------===// +// getPointerToNamedFunction() implementation. +// This code is pasted directly from r153607 of JITMemoryManager.cpp and has +// never been tested. It most likely doesn't work inside the sandbox. +//===----------------------------------------------------------------------===// + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector<void (*)()> AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +// +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *NaClJITMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt index e22b8cd406..f7f814b9cb 100644 --- a/lib/LLVMBuild.txt +++ b/lib/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore +subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore Wrap [component_0] type = Group diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp index c16d1958cd..c5656a54c9 100644 --- a/lib/Linker/LinkArchives.cpp +++ b/lib/Linker/LinkArchives.cpp @@ -16,10 +16,24 @@ #include "llvm/Module.h" #include "llvm/ADT/SetOperations.h" #include "llvm/Bitcode/Archive.h" + +#include "llvm/Support/CommandLine.h" // @LOCALMOD + #include <memory> #include <set> using namespace llvm; +// @LOCALMOD-START +// NOTE: this has a similar effect as +// tools/llvm/llvm-preserve.ll +// which in turn is similar to the GNUS's attribute((used)) +// TODO(robertm): This is a little hackish for now +static cl::list<std::string> +UndefList("referenced-list", cl::value_desc("list"), + cl::desc("A list of symbols assumed to be referenced externally"), + cl::CommaSeparated); +// @LOCALMOD-END + /// GetAllUndefinedSymbols - calculates the set of undefined symbols that still /// exist in an LLVM module. This is a bit tricky because there may be two /// symbols with the same name but different LLVM types that will be resolved to @@ -36,7 +50,10 @@ static void GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) { std::set<std::string> DefinedSymbols; UndefinedSymbols.clear(); - + // @LOCALMOD-START + UndefinedSymbols.insert(UndefList.begin(), UndefList.end()); + // @LOCALMOD-END + // If the program doesn't define a main, try pulling one in from a .a file. // This is needed for programs where the main function is defined in an // archive, such f2c'd programs. diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index a6599bfe4f..b3426fb19f 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -933,6 +933,19 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { ValueMap[I] = DI; } + // @LOCALMOD-BEGIN + // Local patch for http://llvm.org/bugs/show_bug.cgi?id=11112 + // and http://llvm.org/bugs/show_bug.cgi?id=10887 + // Create an identity mapping for instructions so that alloca instructions + // do not get dropped and related debug info isn't lost. E.g., prevent + // call @llvm.dbg.declare(metadata !{i32 * %local_var}, ...) + // from becoming + // call @llvm.dbg.declare(null, ...) + for (Function::iterator BB = Src->begin(), BE = Src->end(); BB != BE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + ValueMap[I] = I; + // @LOCALMOD-END + if (Mode == Linker::DestroySource) { // Splice the body of the source function into the dest function. Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList()); @@ -950,6 +963,13 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { SmallVector<ReturnInst*, 8> Returns; // Ignore returns. CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap); } + + // @LOCALMOD-BEGIN + // There is no need for the identity mapping anymore. + for (Function::iterator BB = Src->begin(), BE = Src->end(); BB != BE; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + ValueMap.erase(I); + // @LOCALMOD-END // There is no need to map the arguments anymore. for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end(); diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index eda062376e..a94d51bb74 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1568,4 +1568,5 @@ MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS, bool IsLittleEndian) { return new ELFObjectWriter(MOTW, OS, IsLittleEndian); + } diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 7ea0f3b85a..e0a83453df 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -24,6 +24,7 @@ using namespace llvm; MCAsmInfo::MCAsmInfo() { PointerSize = 4; + StackSlotSize = 4; // @LOCALMOD IsLittleEndian = true; StackGrowsUp = false; HasSubsectionsViaSymbols = false; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 17a6323d0e..16d1fff8a6 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -205,6 +205,13 @@ public: virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0); + // @LOCALMOD-BEGIN + virtual void EmitBundleLock(); + virtual void EmitBundleUnlock(); + virtual void EmitBundleAlignStart(); + virtual void EmitBundleAlignEnd(); + // @LOCALMOD-END + virtual void EmitFileDirective(StringRef Filename); virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename); @@ -783,6 +790,27 @@ bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, return false; } +// @LOCALMOD-BEGIN +void MCAsmStreamer::EmitBundleLock() { + OS << "\t.bundle_lock"; + EmitEOL(); +} + +void MCAsmStreamer::EmitBundleUnlock() { + OS << "\t.bundle_unlock"; + EmitEOL(); +} + +void MCAsmStreamer::EmitBundleAlignStart() { + OS << "\t.bundle_align_start"; + EmitEOL(); +} + +void MCAsmStreamer::EmitBundleAlignEnd() { + OS << "\t.bundle_align_end"; + EmitEOL(); +} +// @LOCALMOD-END void MCAsmStreamer::EmitFileDirective(StringRef Filename) { assert(MAI.hasSingleParameterDotFile()); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 726ec5aba5..b6c3bb20b5 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" // @LOCALMOD #include "llvm/Support/raw_ostream.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/LEB128.h" @@ -71,6 +72,26 @@ bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const { } void MCAsmLayout::Invalidate(MCFragment *F) { + // @LOCALMOD-BEGIN + if (F->getParent()->isBundlingEnabled()) { + // If this fragment is part of a bundle locked group, + // we need to invalidate all the way to the first fragment + // in the group. + while (F && !F->isBundleGroupStart()) + F = F->getPrevNode(); + assert(F); + // With padding enabled, we need to invalidate back one + // fragment further in in order to force the recalculuation + // of the padding and offset. + if (F->getPrevNode()) { + F = F->getPrevNode(); + } else { + LastValidFragment[F->getParent()] = NULL; + return; + } + } + // @LOCALMOD-END + // If this fragment wasn't already up-to-date, we don't need to do anything. if (!isFragmentUpToDate(F)) return; @@ -133,6 +154,15 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!"); return getFragmentOffset(SD->getFragment()) + SD->getOffset(); } + +// @LOCALMOD-BEGIN +uint8_t MCAsmLayout::getFragmentPadding(const MCFragment *F) const { + EnsureValid(F); + assert(F->BundlePadding != (uint8_t)~UINT8_C(0) && "Padding not set!"); + return F->BundlePadding; +} +// @LOCALMOD-END + uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const { // The size is the last fragment's end offset. @@ -158,10 +188,32 @@ MCFragment::~MCFragment() { } MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent) - : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)) + : Kind(_Kind), + // @LOCALMOD-BEGIN + BundleAlign(BundleAlignNone), + BundleGroupStart(false), + BundleGroupEnd(false), + BundlePadding(~UINT8_C(0)), + // @LOCALMOD-END + Parent(_Parent), Atom(0), Offset(~UINT64_C(0)) { if (Parent) Parent->getFragmentList().push_back(this); + + // @LOCALMOD-BEGIN + if (Parent && Parent->isBundlingEnabled()) { + BundleAlign = Parent->getBundleAlignNext(); + Parent->setBundleAlignNext(MCFragment::BundleAlignNone); + if (Parent->isBundleLocked()) { + BundleGroupStart = Parent->isBundleGroupFirstFrag(); + BundleGroupEnd = false; + Parent->setBundleGroupFirstFrag(false); + } else { + BundleGroupStart = true; + BundleGroupEnd = true; + } + } + // @LOCALMOD-END } /* *** */ @@ -172,12 +224,91 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A) : Section(&_Section), Ordinal(~UINT32_C(0)), Alignment(1), - HasInstructions(false) + HasInstructions(false), +// @LOCALMOD-BEGIN + BundlingEnabled(false), + BundleLocked(false), + BundleGroupFirstFrag(false), + BundleAlignNext(MCFragment::BundleAlignNone), + BundleOffsetKnown(false), + BundleOffset(0) +// @LOCALMOD-END { if (A) A->getSectionList().push_back(this); + + // @LOCALMOD-BEGIN + BundleSize = A->getBackend().getBundleSize(); + if (BundleSize && _Section.UseCodeAlign()) { + BundlingEnabled = true; + setAlignment(BundleSize); + } + // @LOCALMOD-END +} + +// @LOCALMOD-BEGIN +void MCSectionData::MarkBundleOffsetUnknown() { + BundleOffsetKnown = false; + BundleOffset = 0; } +// Only create a new fragment if: +// 1) we are emitting the first instruction of a bundle locked sequence. +// 2) we are not currently emitting a bundle locked sequence and we cannot +// guarantee the instruction would not span a bundle boundary. +// Otherwise, append to the current fragment to reduce the number of fragments. +bool MCSectionData::ShouldCreateNewFragment(size_t Size) { + // The first instruction of a bundle locked region starts a new fragment. + if (isBundleLocked() && isBundleGroupFirstFrag()) + return true; + // Unless we know the relative offset of the end of the current fragment, + // we need to create a new fragment. + if (!isBundleLocked() && !BundleOffsetKnown) + return true; + assert(BundleSize != 0 && "BundleSize needs to be non-zero"); + assert(Size < BundleSize && "Instruction size must be less than BundleSize"); + // If inserting the instruction would overlap a bundle boundary, start a + // new fragment. + // TODO(sehr): we could still explicitly insert a NOP and continue here. + if (BundleOffset + (unsigned) Size > BundleSize) + return true; + return false; +} + +void MCSectionData::UpdateBundleOffset(size_t Size) { + // A bundle locked fragment could move if it spans a bundle boundary. + if (isBundleLocked()) { + BundleOffsetKnown = false; + return; + } + // If inserting the instruction would overlap a bundle boundary, starting a + // new fragment moves the known offset to the end of the instruction in the + // next bundle. + // TODO(sehr): we could insert a NOP and continue the fragment. + if (BundleOffset + (unsigned) Size > BundleSize) + BundleOffset = Size; + else + BundleOffset = BundleOffset + Size; +} + +void MCSectionData::AlignBundleOffsetTo(size_t AlignBase) { + // If BundleOffset is already known, an alignment just moves bundleOffset. + if (BundleOffsetKnown) { + BundleOffset = RoundUpToAlignment(BundleOffset, AlignBase); + return; + } + // Otherwise, if AlignBase is at least as big as a bundle, then we know the + // offset relative to a bundle start. + if (AlignBase >= BundleSize) { + BundleOffsetKnown = true; + BundleOffset = 0; + } else { + BundleOffsetKnown = false; + BundleOffset = 0; + } +} +// @LOCALMOD-END + /* *** */ MCSymbolData::MCSymbolData() : Symbol(0) {} @@ -319,7 +450,10 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout, case MCFragment::FT_LEB: return cast<MCLEBFragment>(F).getContents().size(); - +// @LOCALMOD-BEGIN + case MCFragment::FT_Tiny: + return cast<MCTinyFragment>(F).getContents().size(); +// @LOCALMOD-END case MCFragment::FT_Align: { const MCAlignFragment &AF = cast<MCAlignFragment>(F); unsigned Offset = Layout.getFragmentOffset(&AF); @@ -375,15 +509,145 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) { uint64_t Offset = 0; if (Prev) Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev); - + // @LOCALMOD-BEGIN + F->BundlePadding = getAssembler().ComputeBundlePadding(*this, F, Offset); + Offset += F->BundlePadding; + // @LOCALMOD-END F->Offset = Offset; LastValidFragment[F->getParent()] = F; } +// @LOCALMOD-BEGIN +// Returns number of bytes of padding needed to align to bundle start. +static uint64_t AddressToBundlePadding(uint64_t Address, uint64_t BundleMask) { + return (~Address + 1) & BundleMask; +} + +uint64_t MCAssembler::getBundleSize() const { + return getBackend().getBundleSize(); +} + +uint64_t MCAssembler::getBundleMask() const { + uint64_t BundleSize = getBundleSize(); + uint64_t BundleMask = BundleSize - 1; + assert(BundleSize != 0); + assert((BundleSize & BundleMask) == 0 && + "Bundle size must be a power of 2!"); + return BundleMask; +} + +static unsigned ComputeGroupSize(MCFragment *F) { + if (!F->isBundleGroupStart()) { + return 0; + } + + unsigned GroupSize = 0; + MCFragment *Cur = F; + while (Cur) { + switch (Cur->getKind()) { + default: llvm_unreachable("Unexpected fragment type in bundle!"); + case MCFragment::FT_Align: + case MCFragment::FT_Org: + case MCFragment::FT_Fill: + if (Cur == F && Cur->isBundleGroupEnd()) { + return 0; + } + llvm_unreachable(".bundle_lock cannot contain .align, .org, or .fill"); + case MCFragment::FT_Inst: + GroupSize += cast<MCInstFragment>(Cur)->getInstSize(); + break; + case MCFragment::FT_Data: + GroupSize += cast<MCDataFragment>(Cur)->getContents().size(); + break; + case MCFragment::FT_Tiny: + GroupSize += cast<MCTinyFragment>(Cur)->getContents().size(); + break; + } + if (Cur->isBundleGroupEnd()) + break; + Cur = Cur->getNextNode(); + } + return GroupSize; +} + +uint8_t MCAssembler::ComputeBundlePadding(const MCAsmLayout &Layout, + MCFragment *F, + uint64_t FragmentOffset) const { + if (!F->getParent()->isBundlingEnabled()) + return 0; + + uint64_t BundleSize = getBundleSize(); + uint64_t BundleMask = getBundleMask(); + unsigned GroupSize = ComputeGroupSize(F); + + if (GroupSize > BundleSize) { + // EmitFill creates large groups consisting of repeated single bytes. + // These should be safe at any alignment, and in any case we cannot + // fix them up here. + return 0; + } + + uint64_t Padding = 0; + uint64_t OffsetInBundle = FragmentOffset & BundleMask; + + if (OffsetInBundle + GroupSize > BundleSize || + F->getBundleAlign() == MCFragment::BundleAlignStart) { + // If this group would cross the bundle boundary, or this group must be + // aligned to the start of a bundle, then pad up to start of the next bundle + Padding += AddressToBundlePadding(OffsetInBundle, BundleMask); + OffsetInBundle = 0; + } + if (F->getBundleAlign() == MCFragment::BundleAlignEnd) { + // Push to the end of the bundle + Padding += AddressToBundlePadding(OffsetInBundle + GroupSize, BundleMask); + } + return Padding; +} +// @LOCALMOD-END + + + + +// @LOCALMOD-BEGIN +// Write out BundlePadding bytes in NOPs, being careful not to cross a bundle +// boundary. +static void WriteBundlePadding(const MCAssembler &Asm, + const MCAsmLayout &Layout, + uint64_t Offset, uint64_t TotalPadding, + MCObjectWriter *OW) { + uint64_t BundleSize = Asm.getBundleSize(); + uint64_t BundleMask = Asm.getBundleMask(); + uint64_t PaddingLeft = TotalPadding; + uint64_t StartPos = Offset; + + bool FirstWrite = true; + while (PaddingLeft > 0) { + uint64_t NopsToWrite = + FirstWrite ? AddressToBundlePadding(StartPos, BundleMask) : + BundleSize; + if (NopsToWrite > PaddingLeft) + NopsToWrite = PaddingLeft; + if (!Asm.getBackend().writeNopData(NopsToWrite, OW)) + report_fatal_error("unable to write nop sequence of " + + Twine(NopsToWrite) + " bytes"); + PaddingLeft -= NopsToWrite; + FirstWrite = false; + } +} +// @LOCALMOD-END + /// WriteFragmentData - Write the \p F data to the output file. static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment &F) { MCObjectWriter *OW = &Asm.getWriter(); + // @LOCALMOD-BEGIN + if (F.getParent()->isBundlingEnabled()) { + uint64_t BundlePadding = Layout.getFragmentPadding(&F); + uint64_t PaddingOffset = Layout.getFragmentOffset(&F) - BundlePadding; + WriteBundlePadding(Asm, Layout, PaddingOffset, BundlePadding, OW); + } + // @LOCALMOD-END + uint64_t Start = OW->getStream().tell(); (void) Start; @@ -412,6 +676,16 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, // bytes left to fill use the Value and ValueSize to fill the rest. // If we are aligning with nops, ask that target to emit the right data. if (AF.hasEmitNops()) { + // @LOCALMOD-BEGIN + if (Asm.getBundleSize()) { + WriteBundlePadding(Asm, Layout, + Layout.getFragmentOffset(&F), + FragmentSize, + OW); + break; + } + // @LOCALMOD-END + if (!Asm.getBackend().writeNopData(Count, OW)) report_fatal_error("unable to write nop sequence of " + Twine(Count) + " bytes"); @@ -438,6 +712,15 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout, break; } + // @LOCALMOD-BEGIN + case MCFragment::FT_Tiny: { + MCTinyFragment &TF = cast<MCTinyFragment>(F); + assert(FragmentSize == TF.getContents().size() && "Invalid size!"); + OW->WriteBytes(TF.getContents().str()); + break; + } + // @LOCALMOD-END + case MCFragment::FT_Fill: { MCFillFragment &FF = cast<MCFillFragment>(F); @@ -843,10 +1126,24 @@ void MCFragment::dump() { case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break; case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break; case MCFragment::FT_LEB: OS << "MCLEBFragment"; break; + // @LOCALMOD-BEGIN + case MCFragment::FT_Tiny: OS << "MCTinyFragment"; break; + // @LOCALMOD-END } OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder - << " Offset:" << Offset << ">"; + << " Offset:" << Offset; + // @LOCALMOD-BEGIN + if (BundleGroupStart) + OS << " BundleGroupStart"; + if (BundleGroupEnd) + OS << " BundleGroupEnd"; + if (BundleAlign == BundleAlignStart) + OS << " BundleAlign: Start"; + else if (BundleAlign == BundleAlignEnd) + OS << " BundleAlign: End"; + OS << ">"; + // @LOCALMOD-END switch (getKind()) { case MCFragment::FT_Align: { @@ -895,6 +1192,20 @@ void MCFragment::dump() { IF->getInst().dump_pretty(OS); break; } + // @LOCALMOD-BEGIN + case MCFragment::FT_Tiny: { + const MCTinyFragment *TF = cast<MCTinyFragment>(this); + OS << "\n "; + OS << " Contents:["; + const SmallVectorImpl<char> &Contents = TF->getContents(); + for (unsigned i = 0, e = Contents.size(); i != e; ++i) { + if (i) OS << ","; + OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF); + } + OS << "] (" << Contents.size() << " bytes)"; + break; + } + // @LOCALMOD-END case MCFragment::FT_Org: { const MCOrgFragment *OF = cast<MCOrgFragment>(this); OS << "\n "; diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index f71b266ad6..a1643b2da5 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -777,7 +777,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, static int getDataAlignmentFactor(MCStreamer &streamer) { MCContext &context = streamer.getContext(); const MCAsmInfo &asmInfo = context.getAsmInfo(); - int size = asmInfo.getPointerSize(); + int size = asmInfo.getStackSlotSize(); // @LOCALMOD if (asmInfo.isStackGrowthDirectionUp()) return size; else diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 14fbc1ec83..b1bded288d 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -355,6 +355,7 @@ void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { fixSymbolsInTLSFixups(Value); MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace); + getCurrentSectionData()->MarkBundleOffsetUnknown(); // @LOCALMOD } @@ -423,10 +424,10 @@ void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) { for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i) fixSymbolsInTLSFixups(F.getFixups()[i].getValue()); + getCurrentSectionData()->MarkBundleOffsetUnknown(); // @LOCALMOD } void MCELFStreamer::EmitInstToData(const MCInst &Inst) { - MCDataFragment *DF = getOrCreateDataFragment(); SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; @@ -437,12 +438,26 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { for (unsigned i = 0, e = Fixups.size(); i != e; ++i) fixSymbolsInTLSFixups(Fixups[i].getValue()); - // Add the fixups and data. - for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { - Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); - DF->addFixup(Fixups[i]); + // @LOCALMOD-BEGIN + MCSectionData *SD = getCurrentSectionData(); + + if (Fixups.size() > 0 || !SD->isBundlingEnabled()) { + MCDataFragment *DF = getOrCreateDataFragment(); + + // Add the fixups and data. + for (unsigned i = 0, e = Fixups.size(); i != e; ++i) { + Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size()); + DF->addFixup(Fixups[i]); + } + DF->getContents().append(Code.begin(), Code.end()); + } else { + MCTinyFragment *TF = dyn_cast_or_null<MCTinyFragment>(getCurrentFragment()); + if (!TF || SD->ShouldCreateNewFragment(Code.size())) + TF = new MCTinyFragment(SD); + TF->getContents().append(Code.begin(), Code.end()); } - DF->getContents().append(Code.begin(), Code.end()); + SD->UpdateBundleOffset(Code.size()); + // @LOCALMOD-END } void MCELFStreamer::FinishImpl() { diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 4c17d91551..46579d7b1f 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -83,6 +83,13 @@ namespace { virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value = 0) { return false; } + // @LOCALMOD-BEGIN + virtual void EmitBundleLock() {} + virtual void EmitBundleUnlock() {} + virtual void EmitBundleAlignStart() {} + virtual void EmitBundleAlignEnd() {} + // @LOCALMOD-END + virtual void EmitFileDirective(StringRef Filename) {} virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename) { diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 2e1604d6b5..3338a17e5c 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -543,9 +543,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { SectionKind::getDataRel()); } +// @LOCALMOD-START +// TODO(petarj): HACK! Find a better way to set ELF::EF_MIPS_PIC flag. +// See also file lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp. +Reloc::Model RelocModelOption = Reloc::Default; +// @LOCALMOD-END + void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, CodeModel::Model cm, MCContext &ctx) { + RelocModelOption = relocm; // @LOCALMOD RelocM = relocm; CMModel = cm; Ctx = &ctx; diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 774632306d..37a445fae0 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSection.h" // @LOCALMOD #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -54,6 +55,11 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const { } MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { + // @LOCALMOD-BEGIN + if (getCurrentSectionData()->isBundlingEnabled()) { + return new MCDataFragment(getCurrentSectionData()); + } + // @LOCALMOD-END MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment()); if (!F) F = new MCDataFragment(getCurrentSectionData()); @@ -153,6 +159,54 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias, report_fatal_error("This file format doesn't support weak aliases."); } +// @LOCALMOD-BEGIN ======================================================== + +void MCObjectStreamer::EmitBundleAlignStart() { + MCSectionData *SD = getCurrentSectionData(); + assert(SD->isBundlingEnabled() && + ".bundle_align_start called, but bundling disabled!"); + assert(!SD->isBundleLocked() && + ".bundle_align_start while bundle locked"); + SD->setBundleAlignNext(MCFragment::BundleAlignStart); +} + +void MCObjectStreamer::EmitBundleAlignEnd() { + MCSectionData *SD = getCurrentSectionData(); + assert(SD->isBundlingEnabled() && + ".bundle_align_end called, but bundling disabled!"); + assert(!SD->isBundleLocked() && + ".bundle_align_end while bundle locked"); + SD->setBundleAlignNext(MCFragment::BundleAlignEnd); +} + +void MCObjectStreamer::EmitBundleLock() { + MCSectionData *SD = getCurrentSectionData(); + assert(SD->isBundlingEnabled() && + ".bundle_lock called, but bundling disabled!"); + assert(!SD->isBundleLocked() && + ".bundle_lock issued when bundle already locked"); + SD->setBundleLocked(true); + SD->setBundleGroupFirstFrag(true); +} + +void MCObjectStreamer::EmitBundleUnlock() { + MCSectionData *SD = getCurrentSectionData(); + assert(SD->isBundlingEnabled() && + ".bundle_unlock called, but bundling disabled!"); + assert(SD->isBundleLocked() && + ".bundle_unlock called when bundle not locked"); + // If there has been at least one fragment emitted inside + // this bundle lock, then we need to mark the last emitted + // fragment as the group end. + if (!SD->isBundleGroupFirstFrag()) { + assert(getCurrentFragment() != NULL); + getCurrentFragment()->setBundleGroupEnd(true); + } + SD->setBundleLocked(false); + SD->setBundleGroupFirstFrag(false); +} +// @LOCALMOD-END ========================================================== + void MCObjectStreamer::ChangeSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); @@ -160,6 +214,13 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section) { } void MCObjectStreamer::EmitInstruction(const MCInst &Inst) { + + // @LOCALMOD-BEGIN + if (getAssembler().getBackend().CustomExpandInst(Inst, *this)) { + return; + } + // @LOCALMOD-END + // Scan for values. for (unsigned i = Inst.getNumOperands(); i--; ) if (Inst.getOperand(i).isExpr()) @@ -235,6 +296,7 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { assert(AddrSpace == 0 && "Address space must be 0!"); getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); + getCurrentSectionData()->MarkBundleOffsetUnknown(); // @LOCALMOD } void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -246,6 +308,10 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment, new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, getCurrentSectionData()); + // @LOCALMOD-BEGIN + // Bump the bundle offset to account for alignment. + getCurrentSectionData()->AlignBundleOffsetTo(ByteAlignment); + // @LOCALMOD-END // Update the maximum alignment on the current section if necessary. if (ByteAlignment > getCurrentSectionData()->getAlignment()) getCurrentSectionData()->setAlignment(ByteAlignment); @@ -301,6 +367,7 @@ void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, // FIXME: A MCFillFragment would be more memory efficient but MCExpr has // problems evaluating expressions across multiple fragments. getOrCreateDataFragment()->getContents().append(NumBytes, FillValue); + getCurrentSectionData()->MarkBundleOffsetUnknown(); } void MCObjectStreamer::FinishImpl() { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 6f2e85e553..cf86a44d1b 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -295,6 +295,13 @@ private: // ".align{,32}", ".p2align{,w,l}" bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + // @LOCALMOD-BEGIN + bool ParseDirectiveBundleLock(); + bool ParseDirectiveBundleUnlock(); + bool ParseDirectiveBundleAlignStart(); + bool ParseDirectiveBundleAlignEnd(); + // @LOCALMOD-END + /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which /// accepts a single symbol (which should be a label or an external). bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); @@ -1283,6 +1290,17 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { if (IDVal == ".p2alignl") return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + // @LOCALMOD-BEGIN + if (IDVal == ".bundle_lock") + return ParseDirectiveBundleLock(); + if (IDVal == ".bundle_unlock") + return ParseDirectiveBundleUnlock(); + if (IDVal == ".bundle_align_start") + return ParseDirectiveBundleAlignStart(); + if (IDVal == ".bundle_align_end") + return ParseDirectiveBundleAlignEnd(); + // @LOCALMOD-END + if (IDVal == ".org") return ParseDirectiveOrg(); @@ -2404,6 +2422,50 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { return false; } +// @LOCALMOD-BEGIN +bool AsmParser::ParseDirectiveBundleLock() { + CheckForValidSection(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.bundle_lock' directive"); + Lex(); + getStreamer().EmitBundleLock(); + return false; +} + +bool AsmParser::ParseDirectiveBundleUnlock() { + CheckForValidSection(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.bundle_unlock' directive"); + Lex(); + getStreamer().EmitBundleUnlock(); + return false; +} + +bool AsmParser::ParseDirectiveBundleAlignStart() { + CheckForValidSection(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.bundle_align_start' directive"); + Lex(); + getStreamer().EmitBundleAlignStart(); + return false; +} + +bool AsmParser::ParseDirectiveBundleAlignEnd() { + CheckForValidSection(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.bundle_align_end' directive"); + Lex(); + getStreamer().EmitBundleAlignEnd(); + return false; +} + +// @LOCALMOD-END + + /// ParseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 7625abd465..7f902f1dd7 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -370,5 +370,11 @@ void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) { AddFeature("64bit"); AddFeature("altivec"); } +// @LOCALMOD-BEGIN + } else if (Triple.getArch() == Triple::arm && + Triple.getOS() == Triple::NativeClient) { + AddFeature("-neon"); + AddFeature("+vfp2"); +// @LOCALMOD-END } } diff --git a/lib/Makefile b/lib/Makefile index fd575cd195..c59d77d009 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -11,7 +11,12 @@ LEVEL = .. include $(LEVEL)/Makefile.config PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \ - Target ExecutionEngine Linker MC Object DebugInfo + Target ExecutionEngine Linker MC Object Wrap DebugInfo + +ifeq ($(NACL_SANDBOX),1) + PARALLEL_DIRS := $(filter-out Archive Linker, \ + $(PARALLEL_DIRS)) +endif include $(LEVEL)/Makefile.common diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index e175056279..508bec4028 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -267,6 +267,7 @@ void CrashRecoveryContext::Enable() { gCrashRecoveryEnabled = true; +#if !defined(__native_client__) // Setup the signal handler. struct sigaction Handler; Handler.sa_handler = CrashRecoverySignalHandler; @@ -276,6 +277,9 @@ void CrashRecoveryContext::Enable() { for (unsigned i = 0; i != NumSignals; ++i) { sigaction(Signals[i], &Handler, &PrevActions[i]); } +#else +#warning Cannot setup the signal handler on this machine +#endif } void CrashRecoveryContext::Disable() { @@ -286,9 +290,11 @@ void CrashRecoveryContext::Disable() { gCrashRecoveryEnabled = false; +#if !defined(__native_client__) // Restore the previous signal handlers. for (unsigned i = 0; i != NumSignals; ++i) sigaction(Signals[i], &PrevActions[i], 0); +#endif } #endif diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 45fec361c1..d8884381ab 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -187,3 +187,4 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { } #endif // LLVM_ON_WIN32 + diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 59bfcfcd25..7610d281f0 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -19,7 +19,7 @@ #include <unistd.h> #endif using namespace llvm; - +#ifndef __native_client__ /// \brief Attempt to read the lock file with the given name, if it exists. /// /// \param LockFileName The name of the lock file to read. @@ -214,3 +214,5 @@ void LockFileManager::waitForUnlock() { // Give up. } + +#endif diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index ec373e7f99..0423c7acb3 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -264,7 +264,7 @@ error_code MemoryBuffer::getFile(const char *Filename, static bool shouldUseMmap(int FD, size_t FileSize, size_t MapSize, - off_t Offset, + int64_t Offset, bool RequiresNullTerminator, int PageSize) { // We don't use mmap for small files because this can severely fragment our @@ -275,7 +275,6 @@ static bool shouldUseMmap(int FD, if (!RequiresNullTerminator) return true; - // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. // FIXME: this chunk of code is duplicated, but it avoids a fstat when @@ -335,8 +334,8 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize)) { - off_t RealMapOffset = Offset & ~(PageSize - 1); - off_t Delta = Offset - RealMapOffset; + int64_t RealMapOffset = Offset & ~(PageSize - 1); + int64_t Delta = Offset - RealMapOffset; size_t RealMapSize = MapSize + Delta; if (const char *Pages = sys::Path::MapInFilePages(FD, diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp index 4e4a026b2f..586392fc1e 100644 --- a/lib/Support/Mutex.cpp +++ b/lib/Support/Mutex.cpp @@ -60,7 +60,7 @@ MutexImpl::MutexImpl( bool recursive) assert(errorcode == 0); #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \ - !defined(__DragonFly__) && !defined(__Bitrig__) + !defined(__DragonFly__) && !defined(__Bitrig__) && !defined(__native_client__) // Make it a process local mutex errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE); assert(errorcode == 0); diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index 726e2fbcf0..aa06763258 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -19,7 +19,9 @@ #include "llvm/Config/config.h" #include "llvm/ADT/StringRef.h" #include "Unix.h" +#if !defined(__native_client__) #include <sys/utsname.h> +#endif // (__native_client__) #include <cctype> #include <string> #include <cstdlib> // ::getenv @@ -27,12 +29,16 @@ using namespace llvm; static std::string getOSVersion() { +#if !defined(__native_client__) struct utsname info; if (uname(&info)) return ""; return info.release; +#else // (__native_client__) + return ""; +#endif // (__native_client__) } std::string sys::getDefaultTargetTriple() { diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 9a8abd27f1..f4cfbc65cf 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -12,9 +12,11 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Process.h" +#include "llvm/Support/Debug.h" #ifdef HAVE_SYS_MMAN_H #include <sys/mman.h> @@ -145,8 +147,12 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { return error_code(EINVAL, generic_category()); int Protect = getPosixProtectionFlags(Flags); - +#ifndef __native_client__ int Result = ::mprotect(M.Address, M.Size, Protect); +#else + int Result = -1; + llvm_unreachable("Native client does not support mprotect"); +#endif if (Result != 0) return error_code(errno, system_category()); @@ -194,8 +200,10 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock, void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC, flags, fd, 0); #else +dbgs() << "calling mmap, start " << start << "\n"; void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC, flags, fd, 0); + DEBUG(dbgs() << "mmap returned " << pa<<"\n"); #endif if (pa == MAP_FAILED) { if (NearBlock) //Try again without a near hint diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 6a5ebb8cd9..b82371a7b6 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -133,7 +133,9 @@ Path::GetRootDirectory() { Path Path::GetTemporaryDirectory(std::string *ErrMsg) { -#if defined(HAVE_MKDTEMP) +#if defined(__native_client__) + return Path(""); +#elif defined(HAVE_MKDTEMP) // The best way is with mkdtemp but that's not available on many systems, // Linux and FreeBSD have it. Others probably won't. char pathname[] = "/tmp/llvm_XXXXXX"; @@ -251,6 +253,7 @@ Path::GetUserHomeDirectory() { Path Path::GetCurrentDirectory() { +#if !defined(__native_client__) char pathname[MAXPATHLEN]; if (!getcwd(pathname, MAXPATHLEN)) { assert(false && "Could not query current working directory."); @@ -258,6 +261,9 @@ Path::GetCurrentDirectory() { } return Path(pathname); +#else // (__native_client__) + return Path("./"); +#endif // (__native_client__) } #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ @@ -319,7 +325,9 @@ getprogpath(char ret[PATH_MAX], const char *bin) /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { -#if defined(__APPLE__) +#if defined(__native_client__) + return Path(std::string("./") + std::string(argv0)); +#elif defined(__APPLE__) // On OS X the executable path is saved to the stack by dyld. Reading it // from there is much faster than calling dladdr, especially for large // binaries with symbols. @@ -420,7 +428,11 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const { bool Path::exists() const { +#if !defined(__native_client__) return 0 == access(path.c_str(), F_OK ); +#else // (__native_client__) + return true; +#endif // (__native_client__) } bool @@ -433,21 +445,33 @@ Path::isDirectory() const { bool Path::isSymLink() const { +#if defined(__native_client__) + return false; +#else struct stat buf; if (0 != lstat(path.c_str(), &buf)) return false; return S_ISLNK(buf.st_mode); +#endif } bool Path::canRead() const { +#if !defined(__native_client__) return 0 == access(path.c_str(), R_OK); +#else // (__native_client__) + return true; +#endif // (__native_client__) } bool Path::canWrite() const { +#if !defined(__native_client__) return 0 == access(path.c_str(), W_OK); +#else // (__native_client__) + return true; +#endif // (__native_client__) } bool @@ -466,6 +490,7 @@ Path::isRegularFile() const { bool Path::canExecute() const { +#if !defined(__native_client__) if (0 != access(path.c_str(), R_OK | X_OK )) return false; struct stat buf; @@ -473,6 +498,7 @@ Path::canExecute() const { return false; if (!S_ISREG(buf.st_mode)) return false; +#endif // (__native_client__) return true; } @@ -520,6 +546,7 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { } static bool AddPermissionBits(const Path &File, int bits) { +#if !defined(__native_client__) // Get the umask value from the operating system. We want to use it // when changing the file's permissions. Since calling umask() sets // the umask and returns its old value, we must call it a second @@ -535,6 +562,7 @@ static bool AddPermissionBits(const Path &File, int bits) { // that the umask would not disable. if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1) return false; +#endif // (__native_client__) return true; } @@ -558,6 +586,7 @@ bool Path::makeExecutableOnDisk(std::string* ErrMsg) { bool Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const { +#if !defined(__native_client__) DIR* direntries = ::opendir(path.c_str()); if (direntries == 0) return MakeErrMsg(ErrMsg, path + ": can't open directory"); @@ -583,6 +612,7 @@ Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const { } closedir(direntries); +#endif return false; } @@ -635,7 +665,7 @@ Path::eraseSuffix() { } static bool createDirectoryHelper(char* beg, char* end, bool create_parents) { - +#if !defined(__native_client__) if (access(beg, R_OK | W_OK) == 0) return false; @@ -660,6 +690,9 @@ static bool createDirectoryHelper(char* beg, char* end, bool create_parents) { } return mkdir(beg, S_IRWXU | S_IRWXG) != 0; +#else // (__native_client__) + return false; +#endif // (__native_client__) } bool @@ -683,11 +716,13 @@ Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) { bool Path::createFileOnDisk(std::string* ErrMsg) { +#if !defined(__native_client__) // Create the file int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR); if (fd < 0) return MakeErrMsg(ErrMsg, path + ": can't create file"); ::close(fd); +#endif // (__native_client__) return false; } @@ -707,6 +742,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { bool Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { +#if !defined(__native_client__) // Get the status so we can determine if it's a file or directory. struct stat buf; if (0 != stat(path.c_str(), &buf)) { @@ -751,18 +787,26 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { if (rmdir(pathname.c_str()) != 0) return MakeErrMsg(ErrStr, pathname + ": can't erase directory"); return false; +#else // (__native_client__) + MakeErrMsg(ErrStr, ": PNACL does not know how to erase directories!"); + return false; +#endif // (__native_client__) + } bool Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { +#if !defined(__native_client__) if (0 != ::rename(path.c_str(), newName.c_str())) return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" + newName.str() + "'"); +#endif return false; } bool Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const { +#if !defined(__native_client__) struct utimbuf utb; utb.actime = si.modTime.toPosixTime(); utb.modtime = utb.actime; @@ -770,6 +814,7 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const { return MakeErrMsg(ErrStr, path + ": can't set file modification time"); if (0 != ::chmod(path.c_str(),si.mode)) return MakeErrMsg(ErrStr, path + ": can't set mode"); +#endif // (__native_client__) return false; } diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc index d04f590f87..59c5ae5808 100644 --- a/lib/Support/Unix/PathV2.inc +++ b/lib/Support/Unix/PathV2.inc @@ -117,7 +117,9 @@ error_code current_path(SmallVectorImpl<char> &result) { // For GNU Hurd result.reserve(1024); #endif - +#ifdef __native_client__ + llvm_unreachable("current_path() not implemented for Native Client"); +#else while (true) { if (::getcwd(result.data(), result.capacity()) == 0) { // See if there was a real error. @@ -130,6 +132,7 @@ error_code current_path(SmallVectorImpl<char> &result) { } result.set_size(strlen(result.data())); +#endif return error_code::success(); } @@ -193,6 +196,9 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) { } error_code create_directory(const Twine &path, bool &existed) { +#ifdef __native_client__ + llvm_unreachable("create_directory() not implemented for Native Client"); +#else SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); @@ -204,9 +210,13 @@ error_code create_directory(const Twine &path, bool &existed) { existed = false; return error_code::success(); +#endif } error_code create_hard_link(const Twine &to, const Twine &from) { +#ifdef __native_client__ + llvm_unreachable("create_hard_link() not implemented for Native Client"); +#else // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -217,9 +227,13 @@ error_code create_hard_link(const Twine &to, const Twine &from) { return error_code(errno, system_category()); return error_code::success(); +#endif } error_code create_symlink(const Twine &to, const Twine &from) { +#ifdef __native_client__ + llvm_unreachable("create_symlink() not implemented for Native Client"); +#else // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -230,9 +244,13 @@ error_code create_symlink(const Twine &to, const Twine &from) { return error_code(errno, system_category()); return error_code::success(); +#endif } error_code remove(const Twine &path, bool &existed) { +#ifdef __native_client__ + llvm_unreachable("remove() not implemented for Native Client"); +#else SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); @@ -242,11 +260,14 @@ error_code remove(const Twine &path, bool &existed) { existed = false; } else existed = true; - return error_code::success(); +#endif } error_code rename(const Twine &from, const Twine &to) { +#ifdef __native_client__ + llvm_unreachable("rename() not implemented for Native Client"); +#else // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -266,9 +287,13 @@ error_code rename(const Twine &from, const Twine &to) { } return error_code::success(); +#endif } error_code resize_file(const Twine &path, uint64_t size) { +#ifdef __native_client__ + llvm_unreachable("resize_file() not implemented for Native Client"); +#else SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); @@ -276,6 +301,7 @@ error_code resize_file(const Twine &path, uint64_t size) { return error_code(errno, system_category()); return error_code::success(); +#endif } error_code exists(const Twine &path, bool &result) { @@ -390,6 +416,9 @@ error_code permissions(const Twine &path, perms prms) { error_code unique_file(const Twine &model, int &result_fd, SmallVectorImpl<char> &result_path, bool makeAbsolute, unsigned mode) { +#ifdef __native_client__ + llvm_unreachable("unique_file() not implemented for Native Client"); +#else SmallString<128> Model; model.toVector(Model); // Null terminate. @@ -463,9 +492,14 @@ rety_open_create: result_fd = RandomFD; return error_code::success(); +#endif } error_code mapped_file_region::init(int fd, uint64_t offset) { +#ifdef __native_client__ + // Newlib does not have ftruncate. + llvm_unreachable("mapped_file_region not implemented for native client"); +#else AutoFD FD(fd); // Figure out how large the file is. @@ -491,6 +525,7 @@ error_code mapped_file_region::init(int fd, uint64_t offset) { if (Mapping == MAP_FAILED) return error_code(errno, system_category()); return error_code::success(); +#endif // __native_client__ } mapped_file_region::mapped_file_region(const Twine &path, @@ -501,6 +536,9 @@ mapped_file_region::mapped_file_region(const Twine &path, : Mode(mode) , Size(length) , Mapping() { +#ifdef __native_client__ + llvm_unreachable("mapped_file_region not implemented for native client"); +#endif // Make sure that the requested size fits within SIZE_T. if (length > std::numeric_limits<size_t>::max()) { ec = make_error_code(errc::invalid_argument); @@ -529,6 +567,9 @@ mapped_file_region::mapped_file_region(int fd, : Mode(mode) , Size(length) , Mapping() { +#ifdef __native_client__ + llvm_unreachable("mapped_file_region not implemented for native client"); +#endif // Make sure that the requested size fits within SIZE_T. if (length > std::numeric_limits<size_t>::max()) { ec = make_error_code(errc::invalid_argument); diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 5204147ce3..b2983b21f7 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -36,6 +36,8 @@ # include <termios.h> #endif +#include <sys/unistd.h> + //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only generic UNIX code that //=== is guaranteed to work on *all* UNIX variants. @@ -54,9 +56,10 @@ Process::GetPageSize() const int page_size = 0x1000; #elif defined(HAVE_GETPAGESIZE) const int page_size = ::getpagesize(); -#elif defined(HAVE_SYSCONF) +#elif defined(HAVE_SYSCONF) && !defined(__native_client__) long page_size = ::sysconf(_SC_PAGE_SIZE); #else + const int page_size = 0; #warning Cannot get the page size on this machine #endif return static_cast<unsigned>(page_size); @@ -111,7 +114,7 @@ Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time) { elapsed = TimeValue::now(); -#if defined(HAVE_GETRUSAGE) +#if defined(HAVE_GETRUSAGE) && !defined(__native_client__) struct rusage usage; ::getrusage(RUSAGE_SELF, &usage); user_time = TimeValue( @@ -132,11 +135,23 @@ Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time, } int Process::GetCurrentUserId() { +#if !defined(__native_client__) return getuid(); +#else // (__native_client__) +// TODO(abetul): What the proper return value should be for this function? +// What about having a reserved user_id or the user "nobody" for PNACL? + return -1; +#endif // (__native_client__) } int Process::GetCurrentGroupId() { +#if !defined(__native_client__) return getgid(); +#else // (__native_client__) +// TODO(abetul): What the proper return value should be for this function? +// What about having a reserved/unused group_id? + return -1; +#endif // (__native_client__) } #if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) @@ -332,3 +347,6 @@ unsigned llvm::sys::Process::GetRandomNumber() { return ::rand(); #endif } + +#if !defined(__native_client__) +#endif diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index e5990d06ec..049c41b742 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -103,6 +103,10 @@ Program::FindProgramByName(const std::string& progName) { } static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { +#if defined(__native_client__) + MakeErrMsg(ErrMsg, "Cannot redirect I/O in NaCl"); + return true; +#else // (__native_client__) if (Path == 0) // Noop return false; const char *File; @@ -119,7 +123,6 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { + (FD == 0 ? "input" : "output")); return true; } - // Install it as the requested FD if (dup2(InFD, FD) == -1) { MakeErrMsg(ErrMsg, "Cannot dup2"); @@ -128,6 +131,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { } close(InFD); // Close the original FD return false; +#endif // (__native_client__) } #ifdef HAVE_POSIX_SPAWN @@ -233,6 +237,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, } #endif +#if !defined(__native_client__) // Create a child process. int child = fork(); switch (child) { @@ -293,6 +298,10 @@ Program::Execute(const Path &path, const char **args, const char **envp, Data_ = reinterpret_cast<void*>(child); return true; +#else // (__native_client__) + MakeErrMsg(ErrMsg, "PNACL does not know how to execute child processes!"); + return false; +#endif // (__native_client__) } int @@ -300,6 +309,7 @@ Program::Wait(const sys::Path &path, unsigned secondsToWait, std::string* ErrMsg) { +#if !defined(__native_client__) #ifdef HAVE_SYS_WAIT_H struct sigaction Act, Old; @@ -392,10 +402,16 @@ Program::Wait(const sys::Path &path, *ErrMsg = "Program::Wait is not implemented on this platform yet!"; return -1; #endif +#else // (__native_client__) +// TODO(abetul): What should the proper return value be here? + MakeErrMsg(ErrMsg, "PNACL does not know how to wait for a child process!"); + return -1; +#endif // (__native_client__) } bool Program::Kill(std::string* ErrMsg) { +#if !defined(__native_client__) if (Data_ == 0) { MakeErrMsg(ErrMsg, "Process not started!"); return true; @@ -410,6 +426,12 @@ Program::Kill(std::string* ErrMsg) { } return false; + +#else // (__native_client__) + MakeErrMsg(ErrMsg, "PNACL does not know how to kill processes!"); + return true; +#endif // (__native_client__) + } error_code Program::ChangeStdinToBinary(){ diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 9e94068c9c..264fa5dbde 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -82,6 +82,7 @@ static struct { static void RegisterHandler(int Signal) { +#if !defined(__native_client__) assert(NumRegisteredSignals < sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) && "Out of space for signal handlers!"); @@ -97,6 +98,7 @@ static void RegisterHandler(int Signal) { &RegisteredSignalInfo[NumRegisteredSignals].SA); RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal; ++NumRegisteredSignals; +#endif // (__native_client__) } static void RegisterHandlers() { @@ -108,11 +110,13 @@ static void RegisterHandlers() { } static void UnregisterHandlers() { +#if !defined(__native_client__) // Restore all of the signal handlers to how they were before we showed up. for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i) sigaction(RegisteredSignalInfo[i].SigNo, &RegisteredSignalInfo[i].SA, 0); NumRegisteredSignals = 0; +#endif // (__native_client__) } @@ -155,10 +159,12 @@ static RETSIGTYPE SignalHandler(int Sig) { // instead of recursing in the signal handler. UnregisterHandlers(); +#if !defined(__native_client__) // Unmask all potentially blocked kill signals. sigset_t SigMask; sigfillset(&SigMask); sigprocmask(SIG_UNBLOCK, &SigMask, 0); +#endif SignalsMutex.acquire(); RemoveFilesToRemove(); diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc index 5cf5a9d44e..0eb4ac8ad3 100644 --- a/lib/Support/Unix/TimeValue.inc +++ b/lib/Support/Unix/TimeValue.inc @@ -18,6 +18,13 @@ #include "Unix.h" +// @LOCALMOD-START +#ifndef timerclear +// Newlib does not have the timer{clear,add,sub} macros +#define timerclear(tvp) ((tvp)->tv_sec = (tvp)->tv_usec = 0) +#endif +// @LOCALMOD-END + namespace llvm { using namespace sys; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 1446bbbb8e..0ac92f1ee8 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -20,6 +20,9 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Target/TargetMachine.h" +// @LOCALMOD (for LowerARMMachineInstrToMCInstPCRel) +#include "llvm/MC/MCSymbol.h" + namespace llvm { class ARMAsmPrinter; @@ -44,9 +47,27 @@ FunctionPass *createMLxExpansionPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +/* @LOCALMOD-START */ +FunctionPass *createARMNaClRewritePass(); +/* @LOCALMOD-END */ + void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP); + +/* @LOCALMOD-START */ +// Used to lower the pc-relative MOVi16PIC / MOVTi16PIC pseudo instructions +// into the real MOVi16 / MOVTi16 instructions. +// See comment on MOVi16PIC for more details. +void LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI, + MCInst &OutMI, + ARMAsmPrinter &AP, + unsigned ImmIndex, + unsigned PCIndex, + MCSymbol *PCLabel, + unsigned PCAdjustment); +/* @LOCALMOD-END */ + } // end namespace llvm; #endif diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 23974ad905..5c56b2dc47 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -220,8 +220,13 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, // V7a Processors. def : ProcessorModel<"cortex-a8", CortexA8Model, - [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, +// @LOCALMOD-BEGIN +// TODO(pdox): Resolve this mismatch. + [ProcA8, HasV7Ops, FeatureDB, +// FeatureNEON, FeatureDSPThumb2, FeatureHasRAS]>; +// @LOCALMOD-END + def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS]>; diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index d439d1d7cb..f67decc550 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -52,6 +52,13 @@ #include <cctype> using namespace llvm; +// @LOCALMOD-START +namespace llvm { + extern cl::opt<bool> FlagSfiBranch; + extern cl::opt<bool> FlagSfiData; +} +// @LOCALMOD-END + namespace { // Per section and per symbol attributes are not supported. @@ -223,6 +230,75 @@ getDebugValueLocation(const MachineInstr *MI) const { return Location; } +// @LOCALMOD-START +// Make sure all jump targets are aligned and also all constant pools +void NaclAlignAllJumpTargetsAndConstantPools(MachineFunction &MF) { + // JUMP TABLE TARGETS + MachineJumpTableInfo *jt_info = MF.getJumpTableInfo(); + if (jt_info) { + const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables(); + for (unsigned i=0; i < JT.size(); ++i) { + std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs; + + for (unsigned j=0; j < MBBs.size(); ++j) { + if (MBBs[j]->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { + continue; + } + MBBs[j]->setAlignment(4); + } + } + } + + // FIRST ENTRY IN A ConstanPool + bool last_bb_was_constant_pool = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + if (I->isLandingPad()) { + I->setAlignment(4); + } + + if (I->empty()) continue; + + bool is_constant_pool = I->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY; + + if (last_bb_was_constant_pool != is_constant_pool) { + I->setAlignment(4); + } + + last_bb_was_constant_pool = is_constant_pool; + } +} + +bool ARMAsmPrinter::UseReadOnlyJumpTables() const { + if (Subtarget->isTargetNaCl()) + return true; + return false; +} + +unsigned ARMAsmPrinter::GetTargetBasicBlockAlign() const { + if (Subtarget->isTargetNaCl()) + return 4; + return 0; +} + +unsigned ARMAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const { + if (Subtarget->isTargetNaCl()) { + switch (MI->getOpcode()) { + default: return 0; + // These labels may indicate an indirect entry point that is + // externally reachable and hence must be bundle aligned. + // Note: these labels appear to be always at basic block beginnings + // so it may be possible to simply set the MBB alignment. + // However, it is unclear whether this always holds. + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + return 4; + } + } + return 0; +} +// @LOCALMOD-END + /// EmitDwarfRegOp - Emit dwarf register operation. void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); @@ -299,6 +375,17 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitThumbFunc(CurrentFnSym); } + // @LOCALMOD-START + // make sure function entry is aligned. We use XmagicX as our basis + // for alignment decisions (c.f. assembler sfi macros) + int alignment = MF->getAlignment(); + if (alignment < 4) alignment = 4; + EmitAlignment(alignment); + if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n")); + } + // @LOCALMOD-END + OutStreamer.EmitLabel(CurrentFnSym); } @@ -325,6 +412,11 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); MCP = MF.getConstantPool(); + // @LOCALMOD-START + if (FlagSfiBranch) { + NaclAlignAllJumpTargetsAndConstantPools(MF); + } + // @LOCALMOD-END return AsmPrinter::runOnMachineFunction(MF); } @@ -360,10 +452,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); if ((Modifier && strcmp(Modifier, "lo16") == 0) || - (TF & ARMII::MO_LO16)) + (TF == ARMII::MO_LO16)) // @LOCALMOD: TEMPORARY FIX O << ":lower16:"; else if ((Modifier && strcmp(Modifier, "hi16") == 0) || - (TF & ARMII::MO_HI16)) + (TF == ARMII::MO_HI16)) // @LOCALMOD: TEMPORARY FIX O << ":upper16:"; O << *Mang->getSymbol(GV); @@ -389,6 +481,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, //===--------------------------------------------------------------------===// + MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { SmallString<60> Name; @@ -570,6 +663,8 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +void EmitSFIHeaders(raw_ostream &O); + void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { Reloc::Model RelocM = TM.getRelocationModel(); @@ -629,8 +724,16 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // Emit ARM Build Attributes if (Subtarget->isTargetELF()) emitAttributes(); -} + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) { + std::string str; + raw_string_ostream OS(str); + EmitSFIHeaders(OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + // @LOCALMOD-END +} void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { @@ -700,6 +803,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { } } + //===----------------------------------------------------------------------===// // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile() // FIXME: @@ -965,7 +1069,20 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext); } Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext); + } else { // @LOCALMOD-BEGIN + // Check mustAddCurrentAddress() when getPCAdjustment() == 0, + // and make it actually *Subtract* the current address. + // A more appropriate name is probably "relativeToCurrentAddress", + // since the assembler can't actually handle "X + .", only "X - .". + if (ACPV->mustAddCurrentAddress()) { + MCSymbol *DotSym = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(DotSym); + const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext); + Expr = MCBinaryExpr::CreateSub(Expr, DotExpr, OutContext); + } } + // @LOCALMOD-END + OutStreamer.EmitValue(Expr, Size); } @@ -1595,6 +1712,28 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { InConstantPool = true; } + + // @LOCALMOD-START + // NOTE: we also should make sure that the first data item + // is not in a code bundle + // NOTE: there may be issues with alignment constraints + if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) { + const unsigned size = MI->getOperand(2).getImm(); + //assert(size == 4 || size == 8 && "Unsupported data item size"); + if (size == 8) { + // we cannot generate a size 8 constant at offset 12 (mod 16) + OutStreamer.EmitRawText(StringRef("sfi_nop_if_at_bundle_end\n")); + } + + if (FlagSfiData) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "sfi_illegal_if_at_bundle_begining @ ========== SFI (" << + size << ")\n"; + OutStreamer.EmitRawText(OS.str()); + } + } + // @LOCALMOD-END OutStreamer.EmitLabel(GetCPISymbol(LabelId)); const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; @@ -1725,8 +1864,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. if (!Subtarget->isTargetDarwin()) { - //.long 0xe7ffdefe @ trap - uint32_t Val = 0xe7ffdefeUL; + // @LOCALMOD-START + //.long 0xe7fedef0 @ trap + uint32_t Val = 0xe7fedef0UL; + // @LOCALMOD-END OutStreamer.AddComment("trap"); OutStreamer.EmitIntValue(Val, 4); return; @@ -2023,6 +2164,50 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + + // @LOCALMOD-BEGIN + // These are pseudo ops for MOVW / MOVT with operands relative to a PC label. + // See the comments on MOVi16PIC in the .td file for more details. + case ARM::MOVi16PIC: { + MCInst TmpInst; + // First, build an instruction w/ the real opcode. + TmpInst.setOpcode(ARM::MOVi16); + + unsigned ImmIndex = 1; + unsigned PIC_id_index = 2; + unsigned PCAdjustment = 8; + // NOTE: if getPICLabel was a method of "this", or otherwise in scope for + // LowerARMMachineInstrToMCInstPCRel, then we wouldn't need to create + // it here (as well as below). + MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(PIC_id_index).getImm(), + OutContext); + LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex, + PIC_id_index, PCLabel, PCAdjustment); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::MOVTi16PIC: { + MCInst TmpInst; + // First, build an instruction w/ the real opcode. + TmpInst.setOpcode(ARM::MOVTi16); + + unsigned ImmIndex = 2; + unsigned PIC_id_index = 3; + unsigned PCAdjustment = 8; + + MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(), + getFunctionNumber(), + MI->getOperand(PIC_id_index).getImm(), + OutContext); + + LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex, + PIC_id_index, PCLabel, PCAdjustment); + OutStreamer.EmitInstruction(TmpInst); + return; + } + //@LOCALMOD-END } MCInst TmpInst; diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index c875b2cbdf..ee3604499f 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -72,9 +72,16 @@ public: virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE; + // @LOCALMOD-START + // usually this does nothing on ARM as constants pools + // are handled with custom code. + // For the sfi case we do not use the custom logic and fall back + // to the default implementation. virtual void EmitConstantPool() LLVM_OVERRIDE { - // we emit constant pools customly! + if (FlagSfiDisableCP) AsmPrinter::EmitConstantPool(); } + // @LOCALMOD-END + virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE; virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE; virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE; @@ -83,6 +90,17 @@ public: // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); + + // @LOCALMOD-START + /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata. + virtual bool UseReadOnlyJumpTables() const; + /// GetTargetBasicBlockAlign - Get the target alignment for basic blocks. + virtual unsigned GetTargetBasicBlockAlign() const; + /// GetTargetLabelAlign - Get optional alignment for TargetOpcode + /// labels E.g., EH_LABEL. + /// TODO(sehr,robertm): remove this if the labeled block has address taken. + virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const; + // @LOCALMOD-END private: // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile() diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3c7bb24f42..5280abb40c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1783,6 +1783,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, // Build the new ADD / SUB. unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill).addImm(ThisVal) .addImm((unsigned)Pred).addReg(PredReg).addReg(0) @@ -2250,6 +2251,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc. for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++) OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second); + return true; } } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e5b300fc77..8f5be6a120 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -17,6 +17,7 @@ #include "ARMFrameLowering.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" +#include "ARMTargetMachine.h" // @LOCALMOD #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -60,8 +61,10 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, BasePtr(ARM::R6) { } +extern cl::opt<bool> ReserveR9; // @LOCALMOD const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { + if (ReserveR9) return CSR_NaCl_SaveList; // @LOCALMOD bool ghcCall = false; if (MF) { @@ -80,6 +83,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { + if (ReserveR9) return CSR_NaCl_RegMask; // @LOCALMOD return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } @@ -581,6 +585,13 @@ emitLoadConstPool(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { + // @LOCALMOD-START + // In the sfi case we do not want to use the load const pseudo instr. + // Sadly, the ARM backend is not very consistent about using this + // pseudo instr. and hence checking this is not sufficient. + // But, it should help detect some regressions early. + assert(!FlagSfiDisableCP && "unexpected call to emitLoadConstPool"); + // @LOCALMOD-END MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index b378b96626..a7544cd4cd 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -105,6 +105,10 @@ def CC_ARM_APCS_GHC : CallingConv<[ def CC_ARM_AAPCS_Common : CallingConv<[ + // @LOCALMOD-BEGIN (PR11018) + CCIfByVal<CCPassByVal<4, 4>>, + // @LOCALMOD-END + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, // i64/f64 is passed in even pairs of GPRs @@ -204,3 +208,9 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; // add is a workaround for not being able to compile empty list: // def CSR_GHC : CalleeSavedRegs<()>; def CSR_GHC : CalleeSavedRegs<(add)>; + +// @LOCALMOD-START +// NaCl does not save R9, but otherwise uses the same order as AAPCS +def CSR_NaCl : CalleeSavedRegs<(add LR, R11, R10, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8))>; +// @LOCALMOD-END diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index a57368fdb5..be19a20182 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -374,6 +374,7 @@ FunctionPass *llvm::createARMConstantIslandPass() { } bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { + if (FlagSfiDisableCP) return false; // @LOCALMOD MF = &mf; MCP = mf.getConstantPool(); diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index ae531c4ea8..24f2fcb666 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -81,6 +81,9 @@ public: bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; } bool isLSDA() const { return Kind == ARMCP::CPLSDA; } bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; } + // @LOCALMOD-START + bool isValue() const { return Kind == ARMCP::CPValue; } + // @LOCALMOD-END virtual unsigned getRelocationInfo() const { return 2; } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 8c45e0b98d..348f234f5c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" // @LOCALMOD for llvm::TLSUseCall #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! @@ -43,6 +44,7 @@ namespace { const TargetRegisterInfo *TRI; const ARMSubtarget *STI; ARMFunctionInfo *AFI; + bool IsRelocPIC; // @LOCALMOD virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -63,6 +65,16 @@ namespace { unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); + // @LOCALMOD-BEGIN + void AddPICADD_MOVi16_PICID(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + bool NotThumb, + unsigned PredReg, ARMCC::CondCodes Pred, + unsigned DstReg, bool DstIsDead, + MachineInstrBuilder &LO16, + MachineInstrBuilder &HI16); + // @LOCALMOD-END }; char ARMExpandPseudo::ID = 0; } @@ -478,13 +490,46 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); - // Transfer memoperands. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - MI.eraseFromParent(); } +// @LOCALMOD-BEGIN +// AddPICADD_MOVi16_PICID - Inserts a PICADD into the given basic block, +// and adds the PC label ID (of the PICADD) as an operand of the LO16 / HI16 +// MOVs. The ID operand will follow the "Immediate" operand (assumes that +// operand is already added). +void ARMExpandPseudo::AddPICADD_MOVi16_PICID(MachineInstr &MI, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + bool NotThumb, + unsigned PredReg, ARMCC::CondCodes Pred, + unsigned DstReg, bool DstIsDead, + MachineInstrBuilder &LO16, + MachineInstrBuilder &HI16) { + // Throw in a PICADD, and tack on the PC label ID to the MOVT/MOVWs + MachineFunction &MF = *MI.getParent()->getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + + // Make a unique ID for this PC by pulling from pool of constPoolIDs + unsigned PC_ID = AFI->createPICLabelUId(); + MachineInstrBuilder PicADD = + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NotThumb ? ARM::PICADD : ARM::tPICADD)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addImm(PC_ID) + .addImm(Pred) + .addReg(PredReg); + (void)PicADD; // squelch unused warning. + + // Add the PC label ID after what would have been an absolute address. + LO16 = LO16.addImm(PC_ID); + HI16 = HI16.addImm(PC_ID); +} +// @LOCALMOD-END + /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ /// register operands to real instructions with D register operands. void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { @@ -645,7 +690,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned LO16Opc = 0; unsigned HI16Opc = 0; - if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { + // @LOCALMOD + bool isThumb2 = (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm); + if (isThumb2) { LO16Opc = ARM::t2MOVi16; HI16Opc = ARM::t2MOVTi16; } else { @@ -653,10 +700,28 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16Opc = ARM::MOVTi16; } + // @LOCALMOD-BEGIN + // If constant pools are "disabled" (actually, moved to rodata), then + // many addresses (e.g., the addresses of what used to be the "pools") + // may not be materialized in a pc-relative manner, because MOVT / MOVW + // are used to materialize the addresses. + // We need to know if it matters that references are pc-relative + // (e.g., to be PIC). + // See the comments on MOVi16PIC / MOVTi16PIC for more details. + const bool ShouldUseMOV16PIC = FlagSfiDisableCP && IsRelocPIC && + (MO.isCPI() || MO.isJTI() || MO.isGlobal()); // TODO check this list. + if (ShouldUseMOV16PIC) { + if (isThumb2) + llvm_unreachable("FIXME: add PIC versions of t2MOVi16"); + LO16Opc = ARM::MOVi16PIC; + HI16Opc = ARM::MOVTi16PIC; + } + // @LOCALMOD-END + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(DstReg); + .addReg(DstReg, RegState::Kill); // @LOCALMOD if (MO.isImm()) { unsigned Imm = MO.getImm(); @@ -664,13 +729,31 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned Hi16 = (Imm >> 16) & 0xffff; LO16 = LO16.addImm(Lo16); HI16 = HI16.addImm(Hi16); - } else { + } else if (MO.isGlobal()) { // @LOCALMOD const GlobalValue *GV = MO.getGlobal(); unsigned TF = MO.getTargetFlags(); LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + // @LOCALMOD-START - support for jumptable addresses and CPI + } else if (MO.isCPI()) { + int i = MO.getIndex(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_LO16); + HI16 = HI16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_HI16); + } else if (MO.isJTI()){ + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_LO16); + HI16 = HI16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_HI16); + } else { + assert (0 && "unexpected operand"); + // @LOCALMOD-END } - + // @LOCALMOD-BEGIN + if (ShouldUseMOV16PIC) { + AddPICADD_MOVi16_PICID(MI, MBB, MBBI, !isThumb2, + PredReg, Pred, DstReg, DstIsDead, LO16, HI16); + } + // @LOCALMOD-END LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); @@ -848,13 +931,37 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::tTPsoft: case ARM::TPsoft: { + // @LOCALMOD-BEGIN + if (!STI->isTargetNaCl() || llvm::TLSUseCall) { + // Don't add implicit uses/defs for this call, otherwise + // liveness analysis passes get confused. MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), + BuildMI_NoImp(MBB, MBBI, MI.getDebugLoc(), // @LOCALMOD TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + .addExternalSymbol("__aeabi_read_tp", 0); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - TransferImpOps(MI, MIB, MIB); + TransferImpOps(MI, MIB, MIB); + } else { + // Inline version for native client. + // See native_client/src/untrusted/nacl/aeabi_read_tp.S + // .nexe builds use this version, while irt builds use a call to + // __aeabi_read_tp. + if (FlagNaClUseM23ArmAbi) { + // mov r0, r9 + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::MOVr), ARM::R0) + .addReg(ARM::R9)) + .addReg(0); // Doesn't use/modify CPSR. + } else { + // ldr r0, [r9, #0] + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::LDRi12), ARM::R0) + .addReg(ARM::R9) + .addImm(0)); + } + } + // @LOCALMOD-END MI.eraseFromParent(); return true; } @@ -1210,6 +1317,62 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; + + // @LOCALMOD-BEGIN + case ARM::ARMeh_return: { + // This pseudo instruction is generated as part of the lowering of + // ISD::EH_RETURN (c.f. ARMISelLowering.cpp) + // we convert it to a stack increment by OffsetReg and + // indirect jump to TargetReg + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + unsigned OffsetReg = MI.getOperand(0).getReg(); + unsigned TargetReg = MI.getOperand(1).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ADDrr), ARM::SP) + .addReg(OffsetReg) + .addReg(ARM::SP) + .addImm(Pred) + .addReg(PredReg) + .addReg(0); + + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BX)) + .addReg(TargetReg); + MI.eraseFromParent(); + return true; + } + case ARM::MOVGOTAddr : { + // Expand the pseudo-inst that requests for the GOT address + // to be materialized into a register. We use MOVW/MOVT for this. + // See ARMISelLowering.cpp for a comment on the strategy. + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + unsigned DstReg = MI.getOperand(0).getReg(); + bool DstIsDead = MI.getOperand(0).isDead(); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::MOVi16PIC), + DstReg) + .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_LO16); + + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::MOVTi16PIC)) + .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) + .addReg(DstReg) + .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_HI16); + + AddPICADD_MOVi16_PICID(MI, MBB, MBBI, true, + PredReg, Pred, DstReg, DstIsDead, LO16, HI16); + + (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16.addImm(Pred).addReg(PredReg); + HI16.addImm(Pred).addReg(PredReg); + TransferImpOps(MI, LO16, HI16); + MI.eraseFromParent(); + return true; + } + // @LOCALMOD-END } } @@ -1232,6 +1395,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { TRI = TM.getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); AFI = MF.getInfo<ARMFunctionInfo>(); + IsRelocPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_; bool Modified = false; for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6611862ca0..033540ae7d 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -716,6 +716,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { } unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { + // @LOCALMOD-START + // In the sfi case we do not want to use the ARM custom cp handling. + // This assert should help detect some regressions early. + assert(!FlagSfiDisableCP && "unexpected call to TargetMaterializeConstant"); + // @LOCALMOD-END EVT VT = TLI.getValueType(C->getType(), true); // Only handle simple types. diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 9392497fd0..c8ddbcfaec 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -26,6 +26,9 @@ #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +// @LOCALMOD-START +#include "llvm/CodeGen/MachineModuleInfo.h" +// @LOCALMOD-END using namespace llvm; @@ -153,6 +156,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; int D8SpillFI = 0; + // @LOCALMOD-START + MachineModuleInfo &MMI = MF.getMMI(); + // This condition was gleaned from x86 / PowerPC / XCore + bool needsFrameMoves = MMI.hasDebugInfo() || + !MF.getFunction()->doesNotThrow() || + MF.getFunction()->needsUnwindTableEntry(); + // @LOCALMOD-END + // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction()->getCallingConv() == CallingConv::GHC) @@ -212,6 +223,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Move past area 1. if (GPRCS1Size > 0) MBBI++; + // @LOCALMOD-START + if (needsFrameMoves && GPRCS1Size > 0) { + // we just skipped the initial callee save reg instructions, e.g. + // push {r4, r5, r6, lr} + // NOTE: this likely is not the right thing to do for darwin as it does not + // treat all callee save regs uniformly + MCSymbol *AfterRegSave = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(ARM::PROLOG_LABEL)).addSym(AfterRegSave); + // record the fact that the stack has moved + MachineLocation dst(MachineLocation::VirtualFP); + MachineLocation src(MachineLocation::VirtualFP, -GPRCS1Size); + MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src)); + // for each callee saved register record where it has been saved + int offset = 0; + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::LR: + offset -= 4; + MachineLocation dst(MachineLocation::VirtualFP, offset); + MachineLocation src(Reg); + MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src)); + break; + } + } + } + // @LOCALMOD-END + // Set FP to point to the stack slot that contains the previous FP. // For iOS, FP is R7, which has now been stored in spill area 1. // Otherwise, if this is not iOS, all the callee-saved registers go @@ -225,8 +272,29 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { .addFrameIndex(FramePtrSpillFI).addImm(0) .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); + // @LOCALMOD-START + if (needsFrameMoves) { + // we just emitted the fp pointer setup instruction, e.g. + // add r11, sp, #8 + MCSymbol *AfterFramePointerInit = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(ARM::PROLOG_LABEL)).addSym(AfterFramePointerInit); + // record the fact that the frame pointer is now tracking the "cfa" + // Note, gcc and llvm have a slightly different notion of where the + // frame pointer should be pointing. gcc points after the return address + // and llvm one word further down (two words = 8). + // This should be fine as long as we are consistent. + // NOTE: this is related to the offset computed for + // ISD::FRAME_TO_ARGS_OFFSET + MachineLocation dst(MachineLocation::VirtualFP); + MachineLocation src(FramePtr, 8); + MMI.getFrameMoves().push_back(MachineMove(AfterFramePointerInit, dst, src)); + } + // @LOCALMOD-END } + + // Move past area 2. if (GPRCS2Size > 0) MBBI++; @@ -275,6 +343,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // an inconsistent state (pointing to the middle of callee-saved area). // The interrupt handler can end up clobbering the registers. AFI->setShouldRestoreSPFromFP(true); + + // @LOCALMOD-START + // we only track sp changes if do not have the fp to figure out where + // stack frame lives + if (needsFrameMoves && !HasFP) { + MCSymbol *AfterStackUpdate = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, + TII.get(ARM::PROLOG_LABEL)).addSym(AfterStackUpdate); + MachineLocation dst(MachineLocation::VirtualFP); + MachineLocation src(MachineLocation::VirtualFP, - NumBytes - GPRCS1Size); + MMI.getFrameMoves().push_back(MachineMove(AfterStackUpdate, dst, src)); + } + // @LOCALMOD-END } if (STI.isTargetELF() && hasFP(MF)) @@ -670,7 +751,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { + if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps() && + false /* @LOCALMOD */) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index a1c2b93562..a2280db515 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -27,7 +27,8 @@ protected: public: explicit ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4, + 4), // @LOCALMOD STI(sti) { } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index efd6d2b839..90ae94b3b2 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -35,8 +35,17 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +// @LOCALMOD-START +#include "llvm/Support/CommandLine.h" +namespace llvm { + extern cl::opt<bool> FlagSfiLoad; + extern cl::opt<bool> FlagSfiStore; +} +// @LOCALMOD-END + using namespace llvm; + static cl::opt<bool> DisableShifterOp("disable-shifter-op", cl::Hidden, cl::desc("Disable isel of shifter-op"), @@ -109,21 +118,24 @@ public: bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, + AddrMode2Type SelectAddrMode2Worker(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, + bool SelectAddrMode2Base(SDNode *Op, + SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; + return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_BASE; } - bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, + bool SelectAddrMode2ShOp(SDNode *Op, + SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; + return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_SHOP; } - bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, + bool SelectAddrMode2(SDNode *Op, + SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { - SelectAddrMode2Worker(N, Base, Offset, Opc); + SelectAddrMode2Worker(Op, N, Base, Offset, Opc); // return SelectAddrMode2ShOp(N, Base, Offset, Opc); // This always matches one way or another. return true; @@ -136,7 +148,7 @@ public: bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); bool SelectAddrOffsetNone(SDValue N, SDValue &Base); - bool SelectAddrMode3(SDValue N, SDValue &Base, + bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); bool SelectAddrMode3Offset(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); @@ -434,6 +446,22 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, return true; } +// @LOCALMOD-START +static bool ShouldOperandBeUnwrappedForUseAsBaseAddress( + SDValue& N, const ARMSubtarget* Subtarget) { + assert (N.getOpcode() == ARMISD::Wrapper); + // Never use this transformation if constant island pools are disallowed + if (FlagSfiDisableCP) return false; + + // always apply this when we do not have movt/movw available + // (if we do have movt/movw we be able to get rid of the + // constant pool entry altogether) + if (!Subtarget->useMovt()) return true; + // explain why we do not want to use this for TargetGlobalAddress + if (N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) return true; + return false; +} +// @LOCALMOD-END bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, @@ -452,8 +480,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + // @LOCALMOD + ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) { Base = N.getOperand(0); } else Base = N; @@ -487,6 +515,11 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { + // @LOCALMOD-BEGIN + // Disallow offsets of Reg + Reg (which may escape sandbox). + if (Subtarget->isTargetNaCl()) + return false; + // @LOCALMOD-END if (N.getOpcode() == ISD::MUL && ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { @@ -582,10 +615,24 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, //----- -AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, +AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op, + SDValue N, SDValue &Base, SDValue &Offset, +// @LOCALMOD-START +// Note: In the code below we do not want "Offset" to be real register to +// not violate ARM sandboxing. +// @LOCALMOD-END SDValue &Opc) { + // @LOCALMOD-START + // Avoid two reg addressing mode for loads and stores + const bool restrict_addressing_modes_for_nacl = + (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) || + (FlagSfiStore && (Op->getOpcode() == ISD::STORE)); + // This is neither a sandboxable load nor a sandboxable store. + if (!restrict_addressing_modes_for_nacl) { + // @LOCALMOD-END + if (N.getOpcode() == ISD::MUL && (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { @@ -609,6 +656,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, } } } + } // @LOCALMOD if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && // ISD::OR that is equivalent to an ADD. @@ -618,8 +666,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + // @LOCALMOD + ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -652,7 +700,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, return AM2_BASE; } } - + if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { // Compute R +/- (R << N) and reuse it. Base = N; @@ -662,6 +710,24 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, MVT::i32); return AM2_BASE; } + + // @LOCALMOD-START + // Keep load and store addressing modes simple + if (restrict_addressing_modes_for_nacl) { + Base = N; + if (N.getOpcode() == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + } else if (N.getOpcode() == ARMISD::Wrapper) { + Base = N.getOperand(0); + } + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, + ARM_AM::no_shift), + MVT::i32); + return AM2_BASE; + } + // @LOCALMOD-END // Otherwise this is R +/- [possibly shifted] R. ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; @@ -730,13 +796,27 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) return false; + // @LOCALMOD-BEGIN + // Avoid two reg addressing mode for loads and stores + const bool restrict_addressing_modes_for_nacl = + (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) || + (FlagSfiStore && (Op->getOpcode() == ISD::STORE)); + // @LOCALMOD-END + + Offset = N; ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); unsigned ShAmt = 0; if (ShOpcVal != ARM_AM::no_shift) { // Check to see if the RHS of the shift is a constant, if not, we can't fold // it. - if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + + //if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1)); + // @LOCALMOD-BEGIN + // Neither a sandboxable load nor a sandboxable store. + if (!restrict_addressing_modes_for_nacl && Sh ) { + // @LOCALMOD-END ShAmt = Sh->getZExtValue(); if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) Offset = N.getOperand(0); @@ -799,16 +879,25 @@ bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { return true; } -bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, +bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc) { + // @LOCALMOD-START + // Avoid two reg addressing mode for loads and stores + const bool restrict_addressing_modes_for_nacl = + (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) || + (FlagSfiStore && (Op->getOpcode() == ISD::STORE)); + if (!restrict_addressing_modes_for_nacl) { + // @LOCALMOD-END if (N.getOpcode() == ISD::SUB) { + // X - C is canonicalize to X + -C, no need to handle it here. Base = N.getOperand(0); Offset = N.getOperand(1); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32); return true; } + } // @LOCALMOD-END if (!CurDAG->isBaseWithConstantOffset(N)) { Base = N; @@ -841,6 +930,16 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, return true; } + // @LOCALMOD-START + // A sandboxable load or a sandboxable store. + if (restrict_addressing_modes_for_nacl) { + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); + Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); + return true; + } + // @LOCALMOD-END + Base = N.getOperand(0); Offset = N.getOperand(1); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32); @@ -875,8 +974,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, int FI = cast<FrameIndexSDNode>(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && - !(Subtarget->useMovt() && - N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + // @LOCALMOD + ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -2467,6 +2566,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { !ARM_AM::isSOImmTwoPartVal(Val)); // two instrs. } + if (FlagSfiDisableCP) UseCP = false; // @LOCALMOD + if (UseCP) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ff99b04078..0893826427 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -48,6 +48,15 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" + +// @LOCALMOD-START +namespace llvm { + extern cl::opt<bool> FlagSfiLoad; + extern cl::opt<bool> FlagSfiStore; + extern cl::opt<bool> FlagSfiDisableCP; +} +// @LOCALMOD-END + using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -71,6 +80,7 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::init(true)); namespace { + class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, @@ -255,8 +265,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::SHL_I128, 0); setLibcallName(RTLIB::SRL_I128, 0); setLibcallName(RTLIB::SRA_I128, 0); - - if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { + // @LOCALMOD: use standard names and calling conventions for pnacl + if (!Subtarget->isTargetNaCl() && Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { // Double-precision floating-point arithmetic helper functions // RTABI chapter 4.1.2, Table 2 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); @@ -652,9 +662,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - + // @LOCALMOD-START + if (!Subtarget->useInlineJumpTables()) + setOperationAction(ISD::JumpTable, MVT::i32, Custom); + // @LOCALMOD-END + setOperationAction(ISD::TRAP, MVT::Other, Legal); + // Use the default implementation. setOperationAction(ISD::VASTART, MVT::Other, Custom); setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -667,9 +682,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Non-Darwin platforms may return values in these registers via the // personality function. setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); + // @LOCALMOD-START setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); - setExceptionPointerRegister(ARM::R0); - setExceptionSelectorRegister(ARM::R1); + // we use the first caller saved regs here + // c.f.: llvm-gcc/llvm-gcc-4.2/gcc/unwind-dw2.c::uw_install_context + // NOTE: these are related to the _Unwind_PNaClSetResult{0,1} functions + setExceptionPointerRegister(ARM::R4); + setExceptionSelectorRegister(ARM::R5); + + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + // @LOCALMOD-END } setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); @@ -755,8 +779,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Custom); - + // @LOCALMOD-START + //setOperationAction(ISD::BR_JT, MVT::Other, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, + Subtarget->useInlineJumpTables() ? Custom : Expand); + // @LOCALMOD-END + // We don't support sin/cos/fmod/copysign/pow setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); @@ -793,6 +821,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) { + setOperationAction(ISD::NACL_TP_TLS_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::NACL_TP_TDB_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::NACL_TARGET_ARCH, MVT::i32, Custom); + } + // @LOCALMOD-END + // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); @@ -884,6 +920,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; + // @LOCALMOD-START + case ARMISD::WrapperJT2: return "ARMISD::WrapperJT2"; + case ARMISD::EH_RETURN: return "ARMISD::EH_RETURN"; + // @LOCALMOD-END case ARMISD::CALL: return "ARMISD::CALL"; case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; @@ -1662,6 +1702,27 @@ ARMTargetLowering::HandleByVal( assert((State->getCallOrPrologue() == Prologue || State->getCallOrPrologue() == Call) && "unhandled ParmContext"); + + // @LOCALMOD-BEGIN + // The original mechanism tries to split a byval argument between registers + // and the stack. It doesn't work correctly yet, so disable it. + // This leaves the entire byval argument on the stack, and the rest + // of the parameters will need to be on the stack as well, to have + // the correct order for var-args. We remember the fact that there was + // a byval param that forced this, so that we know not to use the + // handle var-args reg-save area. + // PR11018. + if ((!State->isFirstByValRegValid()) && + (ARM::R0 <= reg) && (reg <= ARM::R3)) { + State->setHasByValInRegPosition(); + } + // Confiscate any remaining parameter registers to preclude their + // assignment to subsequent parameters. + while (State->AllocateReg(GPRArgRegs, 4)) + ; + return; + // @LOCALMOD-END + if ((!State->isFirstByValRegValid()) && (ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { @@ -2062,7 +2123,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { } unsigned ARMTargetLowering::getJumpTableEncoding() const { - return MachineJumpTableInfo::EK_Inline; + // @LOCALMOD-BEGIN + if (Subtarget->useInlineJumpTables()) { + return MachineJumpTableInfo::EK_Inline; + } else { + // TODO: Find a better way to call the super-class. + return TargetLowering::getJumpTableEncoding(); + } + // @LOCALMOD-END } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, @@ -2095,28 +2163,120 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } +// @LOCALMOD-START +// more conventional jumptable implementation +SDValue ARMTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + assert(!Subtarget->useInlineJumpTables() && + "inline jump tables not custom lowered"); + const DebugLoc dl = Op.getDebugLoc(); + EVT PTy = getPointerTy(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); + return DAG.getNode(ARMISD::WrapperJT2, dl, MVT::i32, JTI); +} + +////////////////////////////////////////////////////////////////////// +// NaCl TLS setup / layout intrinsics. +// See: native_client/src/untrusted/stubs/tls_params.h +SDValue ARMTargetLowering::LowerNaClTpTlsOffset(SDValue Op, + SelectionDAG &DAG) const { + // ssize_t __nacl_tp_tls_offset (size_t tls_size) { + // return 8; + // } + return DAG.getConstant(8, Op.getValueType().getSimpleVT()); +} + +SDValue ARMTargetLowering::LowerNaClTpTdbOffset(SDValue Op, + SelectionDAG &DAG) const { + // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) { + // return -tdb_size; + // } + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(), + DAG.getConstant(0, Op.getValueType().getSimpleVT()), + Op.getOperand(0)); +} + +SDValue +ARMTargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const { + // size_t __nacl_target_arch () { + // return PnaclTargetArchitectureARM_32; + // } + return DAG.getConstant(PnaclTargetArchitectureARM_32, + Op.getValueType().getSimpleVT()); +} + +////////////////////////////////////////////////////////////////////// + +// @LOCALMOD-END + // Lower ISD::GlobalTLSAddress using the "general dynamic" model SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); - unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, - ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); - SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); - Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); - Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue Chain = Argument.getValue(1); + // @LOCALMOD-BEGIN + SDValue Chain; + SDValue Argument; + + if (FlagSfiDisableCP) { + // With constant pools "disabled" (moved to rodata), this constant pool + // entry is no longer in text, and simultaneous PC relativeness + // and CP Addr relativeness is no longer expressible. + // So, instead of having: + // + // .LCPI12_0: + // .long var(tlsgd)-((.LPC12_0+8) - .) + // ... + // ldr r2, .LCPI12_0 + // .LPC12_0: + // add r0, pc, r2 + // + // we have: + // + // .LCPI12_0: + // .long var(tlsgd) + // ... + // // get addr of .LCPI12_0 into r2 + // ldr r0, [r2] + // add r0, r2, r0 + // (1) No longer subtracting pc, so no longer adding that back + // (2) Not adding "." in the CP entry, so adding it via instructions. + // + unsigned char PCAdj = 0; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, + false); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + Chain = Argument.getValue(1); + Argument = DAG.getNode(ISD::ADD, dl, PtrVT, Argument, CPAddr); + } else { // sort of @LOCALMOD-END + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); + Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); // @ LOCALMOD + Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); + Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + Chain = Argument.getValue(1); // @LOCALMOD - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); + Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); + } // @LOCALMOD-END // call __tls_get_addr. ArgListTy Args; @@ -2153,25 +2313,49 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - // Initial exec model. - unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = + + // @LOCALMOD-BEGIN + if (FlagSfiDisableCP) { + // Similar to change to LowerToTLSGeneralDynamicModel, and + // for the same reason. + unsigned char PCAdj = 0; + ARMConstantPoolValue *CPV = + ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, + ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, + false); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + Offset = DAG.getLoad(PtrVT, dl, Chain, CPAddr, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + Chain = Offset.getValue(1); + + Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Offset, CPAddr); + + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + } else { // sort of @LOCALMOD-END (indentation) + // Initial exec model. + unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); - Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); - Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - Chain = Offset.getValue(1); + Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); + Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + Chain = Offset.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); + Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); - Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); + Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + } // @LOCALMOD-END } else { // local exec model assert(model == TLSModel::LocalExec); @@ -2323,17 +2507,55 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + + // @LOCALMOD-BEGIN + if (FlagSfiDisableCP) { + // With constant pools "disabled" (moved to rodata), the constant pool + // entry is no longer in text, and the PC relativeness is + // no longer expressible. + // + // Instead of having: + // + // .LCPI12_0: + // .long _GLOBAL_OFFSET_TABLE_-(.LPC12_0+8) + // ... + // ldr r2, .LCPI12_0 + // .LPC12_0: + // add r0, pc, r2 + // + // Things to try: + // (1) get the address of the GOT through a pc-relative MOVW / MOVT. + // + // movw r0, :lower16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8) + // movt r0, :upper16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8) + // .LPC12_0: + // add r0, pc, r0 + // + // (2) Make the constant pool entry relative to its own location + // + // .LCPI12_0: + // .long _GLOBAL_OFFSET_TABLE_-. + // ... + // // get address of LCPI12_0 into r0 (possibly 3 instructions for PIC) + // ldr r1, [r0] + // add r1, r0, r1 + // + // We will try (1) for now, since (2) takes about 3 more instructions + // (and one of them is a load). + return DAG.getNode(ARMISD::WrapperGOT, dl, MVT::i32); + } else { // Sort of LOCALMOD-END (indentation only + unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(), - false, false, false, 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); - return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + MachinePointerInfo::getConstantPool(), + false, false, false, 0); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); + return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + } // @LOCALMOD-END } SDValue @@ -2359,6 +2581,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::nacl_read_tp: // @LOCALMOD case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -2529,6 +2752,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned NumGPRs; if (CCInfo.isFirstByValRegValid()) NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); + // @LOCALMOD-BEGIN + else if (CCInfo.hasByValInRegPosition()) + NumGPRs = 0; + // @LOCALMOD-END else { unsigned int firstUnalloced; firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, @@ -2562,6 +2789,10 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, unsigned firstRegToSaveIndex; if (CCInfo.isFirstByValRegValid()) firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; + // @LOCALMOD-BEGIN + else if (CCInfo.hasByValInRegPosition()) + firstRegToSaveIndex = 4; // Nothing to save. + // @LOCALMOD-END else { firstRegToSaveIndex = CCInfo.getFirstUnallocated (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); @@ -5032,7 +5263,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { "unexpected types for extended operands to VMULL"); return DAG.getNode(NewOpc, DL, VT, Op0, Op1); } - + // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during // isel lowering to take advantage of no-stall back to back vmul + vmla. // vmull q0, d4, d6 @@ -5051,6 +5282,38 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } +// @LOCALMOD-START +// An EH_RETURN is the result of lowering llvm.eh.return.i32 which in turn is +// generated from __builtin_eh_return (offset, handler) +// The effect of this is to adjust the stack pointer by "offset" +// and then branch to "handler". +SDValue ARMTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) + const { + SDValue Chain = Op.getOperand(0); + SDValue Offset = Op.getOperand(1); + SDValue Handler = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Store stack offset in R2, jump target in R3, dummy return value in R0 + // The dummy return value is needed to make the use-def chains happy, + // because the EH_RETURN instruction uses the isReturn attribute, which + // means preceding code needs to define the return register (R0 on ARM). + // http://code.google.com/p/nativeclient/issues/detail?id=2643 + unsigned OffsetReg = ARM::R2; + unsigned AddrReg = ARM::R3; + unsigned ReturnReg = ARM::R0; + Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset); + Chain = DAG.getCopyToReg(Chain, dl, AddrReg, Handler); + Chain = DAG.getCopyToReg(Chain, dl, ReturnReg, DAG.getIntPtrConstant(0)); + return DAG.getNode(ARMISD::EH_RETURN, dl, + MVT::Other, + Chain, + DAG.getRegister(OffsetReg, MVT::i32), + DAG.getRegister(AddrReg, getPointerTy())); +} +// @LOCALMOD-END + + static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { // Convert to float @@ -5296,7 +5559,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalAddress: + case ISD::JumpTable: return LowerJumpTable(Op, DAG); // @LOCALMOD + case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -5315,6 +5579,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); + // @LOCALMOD-START + // The exact semantics of this ISD are not completely clear. + // LLVM seems to always point the fp after the push ra and the old fp, i.e. + // two register slots after the beginning of the stack frame. + // It is not clear what happens when there is no frame pointer but + // but llvm unlike gcc seems to always force one when this node is + // encountered. + case ISD::FRAME_TO_ARGS_OFFSET: return DAG.getIntPtrConstant(2*4); + case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); + // @LOCALMOD-END + case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); @@ -5345,6 +5620,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + // @LOCALMOD-BEGIN + case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); + case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG); + case ISD::NACL_TARGET_ARCH: return LowerNaClTargetArch(Op, DAG); + // @LOCALMOD-END } } @@ -6555,7 +6835,11 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) .addReg(VReg1) .addImm(LoopSize >> 16)); - } else { + } else if (FlagSfiDisableCP) { // @LOCALMOD-START + BuildMI(BB, dl, TII->get(ARM::MOVi32imm)) + .addReg(varEnd, RegState::Define) + .addImm(LoopSize); + } else { // @LOCALMOD-END MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); @@ -9482,6 +9766,16 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, if (Subtarget->isThumb1Only()) return false; + // @LOCALMOD-START + // Avoid two reg addressing mode for loads and stores + const bool restrict_addressing_modes_for_nacl = + ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) || + (FlagSfiStore && N->getOpcode() == ISD::STORE)); + if (restrict_addressing_modes_for_nacl) { + return false; + } + // @LOCALMOD-END + EVT VT; SDValue Ptr; bool isSEXTLoad = false; @@ -9520,7 +9814,15 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SelectionDAG &DAG) const { if (Subtarget->isThumb1Only()) return false; - + // @LOCALMOD-START + // Avoid two reg addressing mode for loads and stores + const bool restrict_addressing_modes_for_nacl = + ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) || + (FlagSfiStore && N->getOpcode() == ISD::STORE)); + if (restrict_addressing_modes_for_nacl) { + return false; + } + // @LOCALMOD-END EVT VT; SDValue Ptr; bool isSEXTLoad = false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4eb3b2cb51..3302ec69a5 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -40,10 +40,13 @@ namespace llvm { WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in // PIC mode. WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable - + // @LOCALMOD-START + WrapperJT2, // like WrapperJT but without the UID + WrapperGOT, // A Wrapper node for GOT addresses + EH_RETURN, // For LowerEH_RETURN + // @LOCALMOD-END // Add pseudo op to model memcpy for struct byval. COPY_STRUCT_BYVAL, - CALL, // Function call. CALL_PRED, // Function call that's predicable. CALL_NOLINK, // Function call with branch not branch-and-link. @@ -434,6 +437,14 @@ namespace llvm { SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const; + // @LOCALMOD-START + SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const; + // @LOCALMOD-END + SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index df2e55ed5c..9f7e50cd27 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -95,6 +95,14 @@ def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperDYN : SDNode<"ARMISD::WrapperDYN", SDTIntUnaryOp>; def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; +// @LOCALMOD-START +// support non-inline jumptables +// we do not use the extre uid immediate that comes with ARMWrapperJT +// TODO(robertm): figure out what it is used for +def ARMWrapperJT2 : SDNode<"ARMISD::WrapperJT2", SDTIntUnaryOp>; +// Support for MOVW/MOVT'ing the GOT address directly into a register. +def ARMWrapperGOT : SDNode<"ARMISD::WrapperGOT", SDTPtrLeaf>; +// @LOCALMOD-END def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; @@ -272,6 +280,11 @@ def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONFor def IsLE : Predicate<"TLI.isLittleEndian()">; def IsBE : Predicate<"TLI.isBigEndian()">; +// @LOCALMOD-BEGIN +def UseConstPool : Predicate<"Subtarget->useConstPool()">; +def DontUseConstPool : Predicate<"!Subtarget->useConstPool()">; +// @LOCALMOD-END + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -808,7 +821,8 @@ def postidx_reg : Operand<i32> { // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } def addrmode2 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode2", []> { + ComplexPattern<i32, 3, "SelectAddrMode2", [], + [SDNPWantRoot]> { // @LOCALMOD let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; let ParserMatchClass = AddrMode2AsmOperand; @@ -848,7 +862,8 @@ def am2offset_imm : Operand<i32>, // FIXME: split into imm vs. reg versions. def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; } def addrmode3 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode3", []> { + ComplexPattern<i32, 3, "SelectAddrMode3", [], + [SDNPWantRoot]> { // @LOCALMOD let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; let ParserMatchClass = AddrMode3AsmOperand; @@ -1568,6 +1583,42 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii, // Instructions //===----------------------------------------------------------------------===// +// @LOCALMOD-START + +def SFI_GUARD_LOADSTORE : +PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>; + +let Defs = [CPSR] in +def SFI_GUARD_LOADSTORE_TST : +PseudoInst<(outs), (ins GPR:$a), NoItinerary, []>; + +// Like SFI_GUARD_LOADSTORE, but reserved for loads into SP. +def SFI_GUARD_SP_LOAD : +PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>; + +def SFI_GUARD_INDIRECT_CALL : +PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>; + +def SFI_GUARD_INDIRECT_JMP : +PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>; + +def SFI_GUARD_CALL : +PseudoInst<(outs), (ins pred:$p), NoItinerary, []>; + +// NOTE: the BX_RET instruction hardcodes lr as well +def SFI_GUARD_RETURN : +PseudoInst<(outs), (ins pred:$p), NoItinerary, []>; + +def SFI_NOP_IF_AT_BUNDLE_END : +PseudoInst<(outs), (ins), NoItinerary, []>; + +// Note: intention is that $src and $dst are the same register. +def SFI_DATA_MASK : +PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>; + +// @LOCALMOD-END + + //===----------------------------------------------------------------------===// // Miscellaneous Instructions. // @@ -1753,7 +1804,9 @@ let isBarrier = 1, isTerminator = 1 in def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, "trap", [(trap)]>, Requires<[IsARM]> { - let Inst = 0xe7ffdefe; + // @LOCALMOD-START + let Inst = 0xe7fedef0; + // @LOCALMOD-END } // Address computation and loads and stores in PIC mode. @@ -1868,6 +1921,33 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // SP is marked as a use to prevent stack-pointer assignments that appear // immediately before calls from potentially appearing dead. +// @LOCALMOD-START +// Exception handling related Node and Instructions. +// The conversion sequence is: +// ISD::EH_RETURN -> ARMISD::EH_RETURN -> +// ARMeh_return -> (stack change + indirect branch) +// +// ARMeh_return takes the place of regular return instruction +// but takes two arguments. +// R2, R3 are used for storing the offset and return address respectively. +def SDT_ARMEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>; + +def ARMehret : SDNode<"ARMISD::EH_RETURN", SDT_ARMEHRET, + [SDNPHasChain, SDNPOptInGlue]>; + + +let isTerminator = 1, isReturn = 1, isBarrier = 1, + Defs = [SP], + Uses = [SP] in { + def ARMeh_return : PseudoInst<(outs), + (ins GPR:$spadj, GPR:$dst), + IIC_Br, + [(ARMehret GPR:$spadj, GPR:$dst)]>, + Requires<[IsARM]>; +} +// @LOCALMOD-END + + let isCall = 1, // FIXME: Do we really need a non-predicated version? If so, it should // at least be a pseudo instruction expanding to the predicated version @@ -2950,6 +3030,69 @@ def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), } // Constraints +// @LOCALMOD-BEGIN +// PIC / PC-relative versions of MOVi16/MOVTi16, which have an extra +// operand representing the ID of the PICADD instruction that corrects +// for relativity. This is used to materialize addresses into +// a register in a PC-relative manner. +// +// E.g. Rather than have an absolute address in $imm, and transferred to +// a register with: +// movw $Rd, :lower16:$imm +// movt $Rd, :upper16:$imm +// +// we will instead have a relative offset: +// movw $Rd, :lower16:$imm - ($pic_add_id + 8) +// ... +// movt $Rd, :upper16:$imm - ($pic_add_id + 8) +// ... +// $pic_add_id: +// add $Rd, pc, $Rd +// +// One way these pseudo instructions (and the corresponding PICADD) +// come about is during expansion of the MOVi32imm pseudo instruction +// (see ARMExpandPseudo::ExpandMBB). +// These pseudo instructions become real instructions when they are +// finally lowered to MCInsts (e.g., at ARMAsmPrinter::EmitInstruction), +// and the extra pclabel ID becomes part of the appropriate operand. +// +// NOTE: aside from adding the pclabel operand, all other operands should +// be the same as the non-PIC versions to simplify conversion to the +// non-pseudo instructions. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, + neverHasSideEffects = 1 in +def MOVi16PIC : PseudoInst<(outs GPR:$Rd), (ins imm0_65535_expr:$imm, + pclabel:$pic_add_id, + pred:$p), + IIC_iMOVi, + []>, + Requires<[IsARM, HasV6T2]>, UnaryDP; + +let Constraints = "$src = $Rd" in +def MOVTi16PIC : PseudoInst<(outs GPR:$Rd), (ins GPR:$src, + imm0_65535_expr:$imm, + pclabel:$pic_add_id, + pred:$p), + IIC_iMOVi, + []>, + UnaryDP, Requires<[IsARM, HasV6T2]>; +// @LOCALMOD-END + +// @LOCALMOD-BEGIN +// Pseudo-instruction that will be expanded into MOVW / MOVT (PIC versions) w/ +// GOT as the operand. +// The alternative is to create a constant pool entry with the (relative) +// GOT address and load from the constant pool. This is currently used +// when constant islands are turned off, since MOVW / MOVT will be faster. +let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in +def MOVGOTAddr : PseudoInst<(outs GPR:$dst), (ins), + IIC_iMOVix2, // will expand to two MOVi's + []>, + Requires<[IsARM, UseMovt]>; + +def : ARMPat<(ARMWrapperGOT), (MOVGOTAddr)>; +// @LOCALMOD-END + def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, Requires<[IsARM, HasV6T2]>; @@ -3057,6 +3200,8 @@ def UBFX : I<(outs GPR:$Rd), // Arithmetic Instructions. // + + defm ADD : AsI1_bin_irs<0b0100, "add", IIC_iALUi, IIC_iALUr, IIC_iALUsr, BinOpFrag<(add node:$LHS, node:$RHS)>, 1>; @@ -4798,9 +4943,20 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), // ConstantPool, GlobalAddress, and JumpTable def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, Requires<[IsARM, DontUseMovt]>; -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +// @LOCALMOD-START +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>, + Requires<[IsARM, DontUseMovt]>; +// @LOCALMOD-END def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, Requires<[IsARM, UseMovt]>; +// @LOCALMOD-START +def : ARMPat<(ARMWrapper tconstpool :$dst), (MOVi32imm tconstpool :$dst)>, + Requires<[IsARM, UseMovt, DontUseConstPool]>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>, + Requires<[IsARM, UseMovt, UseConstPool]>; +def : ARMPat<(ARMWrapperJT2 tjumptable :$dst), (MOVi32imm tjumptable :$dst)>, + Requires<[IsARM, UseMovt]>; +// @LOCALMOD-END def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), (LEApcrelJT tjumptable:$dst, imm:$id)>; @@ -5146,3 +5302,47 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; + +// @LOCALMOD-BEGIN +//===----------------------------------------------------------------------===// +// NativeClient intrinsics +// These provide the ability to implement several low-level features without +// having to link native ASM code on the client. +// This code has to be kept in sync with include/llvm/Intrinsics.td and +// lib/Target/X86InstrNaCl.{td, cpp}. +// TODO(sehr): conditionalize this on IsNaCl64 | IsNaCl32 | IsNaClArm. + +let Uses = [R0], Defs = [R0] in { + // Saves all the callee-saves registers, sp, and lr to the JMP_BUF structure + // pointed to by r0. The JMP_BUF structure is the maximum size over all + // supported architectures. + def NACL_SETJ : AXI<(outs), (ins), + MiscFrm, NoItinerary, + // Bundle start + "sfi_nop_if_at_bundle_end; " + "sfi_data_mask r0; " + "stmia r0!, {{r4, r5, r6, r7, r8, r10, r11, sp, lr}}; " + "mov r0, #0; ", + [(set R0, (int_nacl_setjmp R0, LR))]>; +} + +let isBranch = 1, isBarrier = 1, isTerminator = 1, Uses = [R0, R1] in { + // Restores all the callee-saves registers, sp, and lr from the JMP_BUF + // structure pointed to by r0. Returns the value in r1 at entry. This + // implements the tail of longjmp, with the normalization of the return value + // (if the caller passes zero to longjmp, it should return 1) done in the + // caller. + def NACL_LONGJ : AXI<(outs), (ins), MiscFrm, NoItinerary, + // Bundle start + "ldmia r0!, {{r4, r5, r6, r7, r8, r10, r11, r12, lr}}; " + "sfi_nop_if_at_bundle_end; " + "mov sp, r12; " + "sfi_data_mask sp; " + "movs r0, r1; " + "moveq r0, #1; " + "sfi_nop_if_at_bundle_end; " + "sfi_code_mask lr; " + "bx lr; ", + [(int_nacl_longjmp R0, R1)]>; +} +// @LOCALMOD-END diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 002d64a2d0..c2800acccd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3533,12 +3533,24 @@ def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, Requires<[IsThumb2, DontUseMovt]>; +// @LOCALMOD-START +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>, + Requires<[IsThumb2, DontUseMovt]>; +// @LOCALMOD-END def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, Requires<[IsThumb2, UseMovt]>; def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), (t2LEApcrelJT tjumptable:$dst, imm:$id)>; +// @LOCALMOD-START +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2MOVi32imm tconstpool :$dst)>, + Requires<[IsThumb2, UseMovt, DontUseConstPool]>; +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>, + Requires<[IsThumb2, UseMovt, UseConstPool]>; +def : T2Pat<(ARMWrapperJT2 tjumptable :$dst), (t2MOVi32imm tjumptable :$dst)>, + Requires<[IsThumb2, UseMovt]>; +// @LOCALMOD-END // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 0185289f3b..a8c8dce0cc 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -707,6 +707,7 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, /// ldmia rn, <ra, rb, rc> /// => /// ldmdb rn!, <ra, rb, rc> +/// @LOCALMOD This is especially useful for rn == sp bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool &Advance, @@ -1387,7 +1388,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { /// mov pc, lr /// => /// ldmfd sp!, {..., pc} +// @LOCALMOD for sfi we do not want this to happen bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { + // @LOCALMOD-START + // For NaCl, do not load into PC directly for a return, since NaCl requires + // masking the address first. + if (STI->isTargetNaCl()) { + return false; + } + // @LOCALMOD-END + if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index e2ac9a466e..3dd0848058 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -123,3 +123,57 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, OutMI.addOperand(MCOp); } } + +// @LOCALMOD-BEGIN +// Unlike LowerARMMachineInstrToMCInst, the opcode has already been set. +// Otherwise, this is like LowerARMMachineInstrToMCInst, but with special +// handling where the "immediate" is PC Relative +// (used for MOVi16PIC / MOVTi16PIC, etc. -- see .td file) +void llvm::LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI, + MCInst &OutMI, + ARMAsmPrinter &AP, + unsigned ImmIndex, + unsigned PCIndex, + MCSymbol *PCLabel, + unsigned PCAdjustment) { + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (i == ImmIndex) { + MCContext &Ctx = AP.OutContext; + const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, Ctx); + if (PCAdjustment) { + const MCExpr *AdjExpr = MCConstantExpr::Create(PCAdjustment, Ctx); + PCRelExpr = MCBinaryExpr::CreateAdd(PCRelExpr, AdjExpr, Ctx); + } + + // Get the usual symbol operand, then subtract the PCRelExpr. + const MachineOperand &MOImm = MI->getOperand(ImmIndex); + MCOperand SymOp; + bool DidLower = AP.lowerOperand(MOImm, SymOp); + assert (DidLower && "Immediate-like operand should have been lowered"); + + const MCExpr *Expr = SymOp.getExpr(); + ARMMCExpr::VariantKind TargetKind = ARMMCExpr::VK_ARM_None; + /* Unwrap and rewrap the ARMMCExpr */ + if (Expr->getKind() == MCExpr::Target) { + const ARMMCExpr *TargetExpr = cast<ARMMCExpr>(Expr); + TargetKind = TargetExpr->getKind(); + Expr = TargetExpr->getSubExpr(); + } + Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, Ctx); + if (TargetKind != ARMMCExpr::VK_ARM_None) { + Expr = ARMMCExpr::Create(TargetKind, Expr, Ctx); + } + MCOperand MCOp = MCOperand::CreateExpr(Expr); + OutMI.addOperand(MCOp); + } else if (i == PCIndex) { // dummy index already handled as PCLabel + continue; + } else { + MCOperand MCOp; + if (AP.lowerOperand(MI->getOperand(i), MCOp)) { + OutMI.addOperand(MCOp); + } + } + } +} +// @LOCALMOD-END diff --git a/lib/Target/ARM/ARMNaClHeaders.cpp b/lib/Target/ARM/ARMNaClHeaders.cpp new file mode 100644 index 0000000000..a0b89ab05f --- /dev/null +++ b/lib/Target/ARM/ARMNaClHeaders.cpp @@ -0,0 +1,176 @@ +//===-- ARMNaClHeaders.cpp - Print SFI headers to an ARM .s file -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the initial header string needed +// for the Native Client target in ARM assembly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_ostream.h" +#include "ARMNaClRewritePass.h" +#include <string> + +using namespace llvm; + +void EmitSFIHeaders(raw_ostream &O) { + O << " @ ========================================\n"; + O << "@ Branch: " << FlagSfiBranch << "\n"; + O << "@ Stack: " << FlagSfiStack << "\n"; + O << "@ Store: " << FlagSfiStore << "\n"; + O << "@ Data: " << FlagSfiData << "\n"; + + O << " @ ========================================\n"; + // NOTE: this macro does bundle alignment as follows + // if current bundle pos is X emit pX data items of value "val" + // NOTE: that pos will be one of: 0,4,8,12 + // + O << + "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n" + "\t.set pos, (. - XmagicX) % 16\n" + "\t.fill (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_illegal_if_at_bundle_begining\n" + "\tsfi_long_based_on_pos 1 0 0 0 0xe125be70\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nop_if_at_bundle_end\n" + "\tsfi_long_based_on_pos 0 0 0 1 0xe320f000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot3\n" + "\tsfi_long_based_on_pos 3 2 1 0 0xe320f000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot2\n" + "\tsfi_long_based_on_pos 2 1 0 3 0xe320f000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot1\n" + "\tsfi_long_based_on_pos 1 0 3 2 0xe320f000\n" + "\t.endm\n" + "\n\n"; + + O << " @ ========================================\n"; + if (FlagSfiZeroMask) { + // This mode sets all mask to zero which makes them into nops + // this is useful for linking this code against non-sandboxed code + // for debugging purposes + O << + "\t.macro sfi_data_mask reg cond\n" + "\tbic\\cond \\reg, \\reg, #0\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_data_tst reg\n" + "\ttst \\reg, #0x00000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_code_mask reg cond=\n" + "\tbic\\cond \\reg, \\reg, #0\n" + "\t.endm\n" + "\n\n"; + + } else { + O << + "\t.macro sfi_data_mask reg cond\n" + "\tbic\\cond \\reg, \\reg, #0xc0000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_data_tst reg\n" + "\ttst \\reg, #0xc0000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_code_mask reg cond=\n" + "\tbic\\cond \\reg, \\reg, #0xc000000f\n" + "\t.endm\n" + "\n\n"; + } + + O << " @ ========================================\n"; + if (FlagSfiBranch) { + O << + "\t.macro sfi_call_preamble cond=\n" + "\tsfi_nops_to_force_slot3\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_return_preamble reg cond=\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_code_mask \\reg \\cond\n" + "\t.endm\n" + "\n\n"; + + // This is used just before "bx rx" + O << + "\t.macro sfi_indirect_jump_preamble link cond=\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_code_mask \\link \\cond\n" + "\t.endm\n" + "\n\n"; + + // This is use just before "blx rx" + O << + "\t.macro sfi_indirect_call_preamble link cond=\n" + "\tsfi_nops_to_force_slot2\n" + "\tsfi_code_mask \\link \\cond\n" + "\t.endm\n" + "\n\n"; + + } + + if (FlagSfiStore) { + O << " @ ========================================\n"; + + O << + "\t.macro sfi_load_store_preamble reg cond\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_data_mask \\reg, \\cond\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_cstore_preamble reg\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_data_tst \\reg\n" + "\t.endm\n" + "\n\n"; + } else { + O << + "\t.macro sfi_load_store_preamble reg cond\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_cstore_preamble reg cond\n" + "\t.endm\n" + "\n\n"; + } + + O << " @ ========================================\n"; + O << "\t.text\n"; +} diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp new file mode 100644 index 0000000000..f7f64601d7 --- /dev/null +++ b/lib/Target/ARM/ARMNaClRewritePass.cpp @@ -0,0 +1,883 @@ +//===-- ARMNaClRewritePass.cpp - Native Client Rewrite Pass ------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Native Client Rewrite Pass +// This final pass inserts the sandboxing instructions needed to run inside +// the Native Client sandbox. Native Client requires certain software fault +// isolation (SFI) constructions to be put in place, to prevent escape from +// the sandbox. Native Client refuses to execute binaries without the correct +// SFI sequences. +// +// Potentially dangerous operations which are protected include: +// * Stores +// * Branches +// * Changes to SP +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-sfi" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "ARMNaClRewritePass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" +#include <set> +#include <stdio.h> + +using namespace llvm; + +namespace llvm { + +cl::opt<bool> +FlagSfiData("sfi-data", cl::desc("use illegal at data bundle beginning")); + +cl::opt<bool> +FlagSfiLoad("sfi-load", cl::desc("enable sandboxing for load")); + +cl::opt<bool> +FlagSfiStore("sfi-store", cl::desc("enable sandboxing for stores")); + +cl::opt<bool> +FlagSfiStack("sfi-stack", cl::desc("enable sandboxing for stack changes")); + +cl::opt<bool> +FlagSfiBranch("sfi-branch", cl::desc("enable sandboxing for branches")); + +cl::opt<bool> +FlagNaClUseM23ArmAbi("nacl-use-m23-arm-abi", + cl::desc("use the Chrome M23 ARM ABI")); + +} + +namespace { + class ARMNaClRewritePass : public MachineFunctionPass { + public: + static char ID; + ARMNaClRewritePass() : MachineFunctionPass(ID) {} + + const ARMBaseInstrInfo *TII; + const TargetRegisterInfo *TRI; + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM Native Client Rewrite Pass"; + } + + private: + + bool SandboxMemoryReferencesInBlock(MachineBasicBlock &MBB); + void SandboxMemory(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr &MI, + int AddrIdx, + bool CPSRLive, + bool IsLoad); + bool TryPredicating(MachineInstr &MI, ARMCC::CondCodes); + + bool SandboxBranchesInBlock(MachineBasicBlock &MBB); + bool SandboxStackChangesInBlock(MachineBasicBlock &MBB); + + void SandboxStackChange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + void LightweightVerify(MachineFunction &MF); + }; + char ARMNaClRewritePass::ID = 0; +} + +static bool IsReturn(const MachineInstr &MI) { + return (MI.getOpcode() == ARM::BX_RET); +} + +static bool IsIndirectJump(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: return false; + case ARM::BX: + case ARM::TAILJMPr: + return true; + } +} + +static bool IsIndirectCall(const MachineInstr &MI) { + return MI.getOpcode() == ARM::BLX; +} + +static bool IsDirectCall(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: return false; + case ARM::BL: + case ARM::BL_pred: + case ARM::TPsoft: + return true; + } +} + +static bool IsCPSRLiveOut(const MachineBasicBlock &MBB) { + // CPSR is live-out if any successor lists it as live-in. + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + E = MBB.succ_end(); + SI != E; + ++SI) { + const MachineBasicBlock *Succ = *SI; + if (Succ->isLiveIn(ARM::CPSR)) return true; + } + return false; +} + +static void DumpInstructionVerbose(const MachineInstr &MI) { + dbgs() << MI; + dbgs() << MI.getNumOperands() << " operands:" << "\n"; + for (unsigned i = 0; i < MI.getNumOperands(); ++i) { + const MachineOperand& op = MI.getOperand(i); + dbgs() << " " << i << "(" << op.getType() << "):" << op << "\n"; + } + dbgs() << "\n"; +} + +static void DumpBasicBlockVerbose(const MachineBasicBlock &MBB) { + dbgs() << "\n<<<<< DUMP BASIC BLOCK START\n"; + for (MachineBasicBlock::const_iterator MBBI = MBB.begin(), MBBE = MBB.end(); + MBBI != MBBE; + ++MBBI) { + DumpInstructionVerbose(*MBBI); + } + dbgs() << "<<<<< DUMP BASIC BLOCK END\n\n"; +} + +/**********************************************************************/ +/* Exported functions */ + +namespace ARM_SFI { + +bool IsStackChange(const MachineInstr &MI, const TargetRegisterInfo *TRI) { + return MI.modifiesRegister(ARM::SP, TRI); +} + +bool NextInstrMasksSP(const MachineInstr &MI) { + MachineBasicBlock::const_iterator It = &MI; + const MachineBasicBlock *MBB = MI.getParent(); + + MachineBasicBlock::const_iterator next = ++It; + if (next == MBB->end()) { + return false; + } + + const MachineInstr &next_instr = *next; + unsigned opcode = next_instr.getOpcode(); + return (opcode == ARM::SFI_DATA_MASK) && + (next_instr.getOperand(0).getReg() == ARM::SP); +} + +bool IsSandboxedStackChange(const MachineInstr &MI) { + // Calls do not change the stack on ARM but they have implicit-defs, so + // make sure they do not get sandboxed. + if (MI.getDesc().isCall()) + return true; + + unsigned opcode = MI.getOpcode(); + switch (opcode) { + default: break; + + // Our mask instructions correctly update the stack pointer. + case ARM::SFI_DATA_MASK: + return true; + + // These just bump SP by a little (and access the stack), + // so that is okay due to guard pages. + case ARM::STMIA_UPD: + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + return true; + + // Similar, unless it is a load into SP... + case ARM::LDMIA_UPD: + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: { + bool dest_SP = false; + // Dest regs start at operand index 4. + for (unsigned i = 4; i < MI.getNumOperands(); ++i) { + const MachineOperand &DestReg = MI.getOperand(i); + dest_SP = dest_SP || (DestReg.getReg() == ARM::SP); + } + if (dest_SP) { + break; + } + return true; + } + + // Some localmods *should* prevent selecting a reg offset + // (see SelectAddrMode2 in ARMISelDAGToDAG.cpp). + // Otherwise, the store is already a potential violation. + case ARM::STR_PRE_REG: + case ARM::STR_PRE_IMM: + + case ARM::STRH_PRE: + + case ARM::STRB_PRE_REG: + case ARM::STRB_PRE_IMM: + return true; + + // Similar, unless it is a load into SP... + case ARM::LDRi12: + case ARM::LDR_PRE_REG: + case ARM::LDR_PRE_IMM: + case ARM::LDRH_PRE: + case ARM::LDRB_PRE_REG: + case ARM::LDRB_PRE_IMM: + case ARM::LDRSH_PRE: + case ARM::LDRSB_PRE: { + const MachineOperand &DestReg = MI.getOperand(0); + if (DestReg.getReg() == ARM::SP) { + break; + } + return true; + } + + // Here, if SP is the base / write-back reg, we need to check if + // a reg is used as offset (otherwise it is not a small nudge). + case ARM::STR_POST_REG: + case ARM::STR_POST_IMM: + case ARM::STRH_POST: + case ARM::STRB_POST_REG: + case ARM::STRB_POST_IMM: { + const MachineOperand &WBReg = MI.getOperand(0); + const MachineOperand &OffReg = MI.getOperand(3); + if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) { + break; + } + return true; + } + + // Similar, but also check that DestReg is not SP. + case ARM::LDR_POST_REG: + case ARM::LDR_POST_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRH_POST: + case ARM::LDRSH_POST: + case ARM::LDRSB_POST: { + const MachineOperand &DestReg = MI.getOperand(0); + if (DestReg.getReg() == ARM::SP) { + break; + } + const MachineOperand &WBReg = MI.getOperand(1); + const MachineOperand &OffReg = MI.getOperand(3); + if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) { + break; + } + return true; + } + } + + return (NextInstrMasksSP(MI)); +} + +bool NeedSandboxStackChange(const MachineInstr &MI, + const TargetRegisterInfo *TRI) { + return (IsStackChange(MI, TRI) && !IsSandboxedStackChange(MI)); +} + +} // namespace ARM_SFI + +/**********************************************************************/ + +void ARMNaClRewritePass::getAnalysisUsage(AnalysisUsage &AU) const { + // Slight (possibly unnecessary) efficiency tweak: + // Promise not to modify the CFG. + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/* + * A primitive validator to catch problems at compile time. + * E.g., it could be used along with bugpoint to reduce a bitcode file. + */ +void ARMNaClRewritePass::LightweightVerify(MachineFunction &MF) { + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); + MBBI != MBBE; + ++MBBI) { + MachineInstr &MI = *MBBI; + + if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) { + dbgs() << "LightWeightVerify for function: " + << MF.getFunction()->getName() << " (BAD STACK CHANGE)\n"; + DumpInstructionVerbose(MI); + DumpBasicBlockVerbose(MBB); + // assert(false && "LightweightVerify Failed"); + } + } + } +} + +void ARMNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // (1) Ensure there is room in the bundle for a data mask instruction + // (nop'ing to the next bundle if needed). + // (2) Do a data mask on SP after the instruction that updated SP. + MachineInstr &MI = *MBBI; + + // Use same predicate as current instruction. + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_NOP_IF_AT_BUNDLE_END)); + + // Get to next instr. + MachineBasicBlock::iterator MBBINext = (++MBBI); + + BuildMI(MBB, MBBINext, MI.getDebugLoc(), + TII->get(ARM::SFI_DATA_MASK)) + .addReg(ARM::SP, RegState::Define) // modify SP (as dst) + .addReg(ARM::SP, RegState::Kill) // start with SP (as src) + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) +} + +bool ARMNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) { + SandboxStackChange(MBB, MBBI); + Modified |= true; + } + } + return Modified; +} + +bool ARMNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + // Use same predicate as current instruction. + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + + if (IsReturn(MI)) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_GUARD_RETURN)) + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) + Modified = true; + } + + if (IsIndirectJump(MI)) { + unsigned Addr = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_GUARD_INDIRECT_JMP)) + .addReg(Addr, RegState::Define) // Destination definition (as dst) + .addReg(Addr, RegState::Kill) // Destination read (as src) + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) + Modified = true; + } + + if (IsDirectCall(MI)) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_GUARD_CALL)) + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) + Modified = true; + } + + if (IsIndirectCall(MI)) { + unsigned Addr = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_GUARD_INDIRECT_CALL)) + .addReg(Addr, RegState::Define) // Destination definition (as dst) + .addReg(Addr, RegState::Kill) // Destination read (as src) + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) + Modified = true; + } + } + + return Modified; +} + +bool ARMNaClRewritePass::TryPredicating(MachineInstr &MI, ARMCC::CondCodes Pred) { + // Can't predicate if it's already predicated. + // TODO(cbiffle): actually we can, if the conditions match. + if (TII->isPredicated(&MI)) return false; + + /* + * ARM predicate operands use two actual MachineOperands: an immediate + * holding the predicate condition, and a register referencing the flags. + */ + SmallVector<MachineOperand, 2> PredOperands; + PredOperands.push_back(MachineOperand::CreateImm((int64_t) Pred)); + PredOperands.push_back(MachineOperand::CreateReg(ARM::CPSR, false)); + + // This attempts to rewrite, but some instructions can't be predicated. + return TII->PredicateInstruction(&MI, PredOperands); +} + +static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: return false; + + // Instructions with base address register in position 0... + case ARM::LDMIA: + case ARM::LDMDA: + case ARM::LDMDB: + case ARM::LDMIB: + + case ARM::VLDMDIA: + case ARM::VLDMSIA: + *AddrIdx = 0; + break; + // Instructions with base address register in position 1... + case ARM::LDMIA_UPD: // same reg at position 0 and position 1 + case ARM::LDMDA_UPD: + case ARM::LDMDB_UPD: + case ARM::LDMIB_UPD: + + case ARM::LDRSB: + case ARM::LDRH: + case ARM::LDRSH: + + case ARM::LDRi12: + case ARM::LDRrs: + case ARM::LDRBi12: + case ARM::LDRBrs: + case ARM::VLDMDIA_UPD: + case ARM::VLDMDDB_UPD: + case ARM::VLDMSIA_UPD: + case ARM::VLDMSDB_UPD: + case ARM::VLDRS: + case ARM::VLDRD: + + case ARM::LDREX: + case ARM::LDREXB: + case ARM::LDREXH: + *AddrIdx = 1; + break; + + // Instructions with base address register in position 2... + case ARM::LDR_PRE_REG: + case ARM::LDR_PRE_IMM: + case ARM::LDR_POST_REG: + case ARM::LDR_POST_IMM: + + case ARM::LDRB_PRE_REG: + case ARM::LDRB_PRE_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: + + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + + case ARM::LDRD: + *AddrIdx = 2; + break; + } + + if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) { + // The contents of SP do not require masking. + return false; + } + + return true; +} + +/* + * Sandboxes a memory reference instruction by inserting an appropriate mask + * or check operation before it. + */ +void ARMNaClRewritePass::SandboxMemory(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr &MI, + int AddrIdx, + bool CPSRLive, + bool IsLoad) { + unsigned Addr = MI.getOperand(AddrIdx).getReg(); + + if (!FlagNaClUseM23ArmAbi && Addr == ARM::R9) { + // R9-relative loads are no longer sandboxed. + assert(IsLoad && "There should be no r9-relative stores"); + } else if (!CPSRLive && TryPredicating(MI, ARMCC::EQ)) { + /* + * For unconditional memory references where CPSR is not in use, we can use + * a faster sandboxing sequence by predicating the load/store -- assuming we + * *can* predicate the load/store. + */ + + // TODO(sehr): add SFI_GUARD_SP_LOAD_TST. + // Instruction can be predicated -- use the new sandbox. + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::SFI_GUARD_LOADSTORE_TST)) + .addReg(Addr); // Address read (as src) + } else { + unsigned Opcode; + if (IsLoad && (MI.getOperand(0).getReg() == ARM::SP)) { + Opcode = ARM::SFI_GUARD_SP_LOAD; + } else { + Opcode = ARM::SFI_GUARD_LOADSTORE; + } + // Use same predicate as current instruction. + unsigned PredReg = 0; + ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg); + // Use the older BIC sandbox, which is universal, but incurs a stall. + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) + .addReg(Addr, RegState::Define) // Address definition (as dst). + .addReg(Addr, RegState::Kill) // Address read (as src). + .addImm((int64_t) Pred) // predicate condition + .addReg(PredReg); // predicate source register (CPSR) + + /* + * This pseudo-instruction is intended to generate something resembling the + * following, but with alignment enforced. + * TODO(cbiffle): move alignment into this function, use the code below. + * + * // bic<cc> Addr, Addr, #0xC0000000 + * BuildMI(MBB, MBBI, MI.getDebugLoc(), + * TII->get(ARM::BICri)) + * .addReg(Addr) // rD + * .addReg(Addr) // rN + * .addImm(0xC0000000) // imm + * .addImm((int64_t) Pred) // predicate condition + * .addReg(PredReg) // predicate source register (CPSR) + * .addReg(0); // flag output register (0 == no flags) + */ + } +} + +static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: return false; + + // Instructions with base address register in position 0... + case ARM::STMIA: + case ARM::STMDA: + case ARM::STMDB: + case ARM::STMIB: + + case ARM::VSTMDIA: + case ARM::VSTMSIA: + *AddrIdx = 0; + break; + + // Instructions with base address register in position 1... + case ARM::STMIA_UPD: // same reg at position 0 and position 1 + case ARM::STMDA_UPD: + case ARM::STMDB_UPD: + case ARM::STMIB_UPD: + + case ARM::STRH: + case ARM::STRi12: + case ARM::STRrs: + case ARM::STRBi12: + case ARM::STRBrs: + case ARM::VSTMDIA_UPD: + case ARM::VSTMDDB_UPD: + case ARM::VSTMSIA_UPD: + case ARM::VSTMSDB_UPD: + case ARM::VSTRS: + case ARM::VSTRD: + *AddrIdx = 1; + break; + + // + // NEON stores + // + + // VST1 + case ARM::VST1d8: + case ARM::VST1d16: + case ARM::VST1d32: + case ARM::VST1d64: + case ARM::VST1q8: + case ARM::VST1q16: + case ARM::VST1q32: + case ARM::VST1q64: + case ARM::VST1d8wb_fixed: + case ARM::VST1d16wb_fixed: + case ARM::VST1d32wb_fixed: + case ARM::VST1d64wb_fixed: + case ARM::VST1q8wb_fixed: + case ARM::VST1q16wb_fixed: + case ARM::VST1q32wb_fixed: + case ARM::VST1q64wb_fixed: + case ARM::VST1d8wb_register: + case ARM::VST1d16wb_register: + case ARM::VST1d32wb_register: + case ARM::VST1d64wb_register: + case ARM::VST1q8wb_register: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_register: + + // VST1LN + case ARM::VST1LNd8: + case ARM::VST1LNd16: + case ARM::VST1LNd32: + case ARM::VST1LNd8_UPD: + case ARM::VST1LNd16_UPD: + case ARM::VST1LNd32_UPD: + + // VST2 + case ARM::VST2d8: + case ARM::VST2d16: + case ARM::VST2d32: + case ARM::VST2q8: + case ARM::VST2q16: + case ARM::VST2q32: + case ARM::VST2d8wb_fixed: + case ARM::VST2d16wb_fixed: + case ARM::VST2d32wb_fixed: + case ARM::VST2q8wb_fixed: + case ARM::VST2q16wb_fixed: + case ARM::VST2q32wb_fixed: + case ARM::VST2d8wb_register: + case ARM::VST2d16wb_register: + case ARM::VST2d32wb_register: + case ARM::VST2q8wb_register: + case ARM::VST2q16wb_register: + case ARM::VST2q32wb_register: + + // VST2LN + case ARM::VST2LNd8: + case ARM::VST2LNd16: + case ARM::VST2LNq16: + case ARM::VST2LNd32: + case ARM::VST2LNq32: + case ARM::VST2LNd8_UPD: + case ARM::VST2LNd16_UPD: + case ARM::VST2LNq16_UPD: + case ARM::VST2LNd32_UPD: + case ARM::VST2LNq32_UPD: + + // VST3 + case ARM::VST3d8: + case ARM::VST3d16: + case ARM::VST3d32: + case ARM::VST3q8: + case ARM::VST3q16: + case ARM::VST3q32: + case ARM::VST3d8_UPD: + case ARM::VST3d16_UPD: + case ARM::VST3d32_UPD: + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + + // VST3LN + case ARM::VST3LNd8: + case ARM::VST3LNd16: + case ARM::VST3LNq16: + case ARM::VST3LNd32: + case ARM::VST3LNq32: + case ARM::VST3LNd8_UPD: + case ARM::VST3LNd16_UPD: + case ARM::VST3LNq16_UPD: + case ARM::VST3LNd32_UPD: + case ARM::VST3LNq32_UPD: + + // VST4 + case ARM::VST4d8: + case ARM::VST4d16: + case ARM::VST4d32: + case ARM::VST4q8: + case ARM::VST4q16: + case ARM::VST4q32: + case ARM::VST4d8_UPD: + case ARM::VST4d16_UPD: + case ARM::VST4d32_UPD: + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + + // VST4LN + case ARM::VST4LNd8: + case ARM::VST4LNd16: + case ARM::VST4LNq16: + case ARM::VST4LNd32: + case ARM::VST4LNq32: + case ARM::VST4LNd8_UPD: + case ARM::VST4LNd16_UPD: + case ARM::VST4LNq16_UPD: + case ARM::VST4LNd32_UPD: + case ARM::VST4LNq32_UPD: + + *AddrIdx = 0; + break; + + // Instructions with base address register in position 2... + case ARM::STR_PRE_REG: + case ARM::STR_PRE_IMM: + case ARM::STR_POST_REG: + case ARM::STR_POST_IMM: + + case ARM::STRB_PRE_REG: + case ARM::STRB_PRE_IMM: + case ARM::STRB_POST_REG: + case ARM::STRB_POST_IMM: + + case ARM::STRH_PRE: + case ARM::STRH_POST: + + + case ARM::STRD: + case ARM::STREX: + case ARM::STREXB: + case ARM::STREXH: + *AddrIdx = 2; + break; + } + + if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) { + // The contents of SP do not require masking. + return false; + } + + return true; +} + +bool ARMNaClRewritePass::SandboxMemoryReferencesInBlock( + MachineBasicBlock &MBB) { + /* + * This is a simple local reverse-dataflow analysis to determine where CPSR + * is live. We cannot use the conditional store sequence anywhere that CPSR + * is live, or we'd affect correctness. The existing liveness analysis passes + * barf when applied pre-emit, after allocation, so we must do it ourselves. + */ + + // LOCALMOD(pdox): Short-circuit this function. Assume CPSR is always live, + // until we figure out why the assert is tripping. + bool Modified2 = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + int AddrIdx; + + if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) { + bool CPSRLive = true; + SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true); + Modified2 = true; + } + if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) { + bool CPSRLive = true; + SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false); + Modified2 = true; + } + } + return Modified2; + // END LOCALMOD(pdox) + + bool CPSRLive = IsCPSRLiveOut(MBB); + + // Given that, record which instructions should not be altered to trash CPSR: + std::set<const MachineInstr *> InstrsWhereCPSRLives; + for (MachineBasicBlock::const_reverse_iterator MBBI = MBB.rbegin(), + E = MBB.rend(); + MBBI != E; + ++MBBI) { + const MachineInstr &MI = *MBBI; + // Check for kills first. + if (MI.modifiesRegister(ARM::CPSR, TRI)) CPSRLive = false; + // Then check for uses. + if (MI.readsRegister(ARM::CPSR)) CPSRLive = true; + + if (CPSRLive) InstrsWhereCPSRLives.insert(&MI); + } + + // Sanity check: + assert(CPSRLive == MBB.isLiveIn(ARM::CPSR) + && "CPSR Liveness analysis does not match cached live-in result."); + + // Now: find and sandbox stores. + bool Modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + int AddrIdx; + + if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) { + bool CPSRLive = + (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end()); + SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true); + Modified = true; + } + if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) { + bool CPSRLive = + (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end()); + SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false); + Modified = true; + } + } + + return Modified; +} + +/**********************************************************************/ + +bool ARMNaClRewritePass::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + TRI = MF.getTarget().getRegisterInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); + MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + + if (MBB.hasAddressTaken()) { + //FIXME: use symbolic constant or get this value from some configuration + MBB.setAlignment(4); + Modified = true; + } + + if (FlagSfiLoad || FlagSfiStore) + Modified |= SandboxMemoryReferencesInBlock(MBB); + if (FlagSfiBranch) Modified |= SandboxBranchesInBlock(MBB); + if (FlagSfiStack) Modified |= SandboxStackChangesInBlock(MBB); + } + DEBUG(LightweightVerify(MF)); + return Modified; +} + +/// createARMNaClRewritePass - returns an instance of the NaClRewritePass. +FunctionPass *llvm::createARMNaClRewritePass() { + return new ARMNaClRewritePass(); +} diff --git a/lib/Target/ARM/ARMNaClRewritePass.h b/lib/Target/ARM/ARMNaClRewritePass.h new file mode 100644 index 0000000000..c8854a54fc --- /dev/null +++ b/lib/Target/ARM/ARMNaClRewritePass.h @@ -0,0 +1,36 @@ +//===-- ARMNaClRewritePass.h - NaCl Sandboxing Pass ------- --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ARMNACLREWRITEPASS_H +#define TARGET_ARMNACLREWRITEPASS_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + extern cl::opt<bool> FlagSfiZeroMask; + extern cl::opt<bool> FlagSfiData; + extern cl::opt<bool> FlagSfiLoad; + extern cl::opt<bool> FlagSfiStore; + extern cl::opt<bool> FlagSfiStack; + extern cl::opt<bool> FlagSfiBranch; +} + +namespace ARM_SFI { + +bool IsStackChange(const llvm::MachineInstr &MI, + const llvm::TargetRegisterInfo *TRI); +bool IsSandboxedStackChange(const llvm::MachineInstr &MI); +bool NeedSandboxStackChange(const llvm::MachineInstr &MI, + const llvm::TargetRegisterInfo *TRI); + +} // namespace ARM_SFI + +#endif diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index b33b3c915a..4c44f69f4d 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -146,7 +146,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { // Use default for non AAPCS (or Darwin) subtargets - if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) + if (Subtarget->isTargetNaCl() || !Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) // @LOCALMOD return SDValue(); const ARMTargetLowering &TLI = diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index bcc9db4ae3..fc67d418ea 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -24,13 +24,22 @@ using namespace llvm; -static cl::opt<bool> +cl::opt<bool> // @LOCALMOD ReserveR9("arm-reserve-r9", cl::Hidden, cl::desc("Reserve R9, making it unavailable as GPR")); static cl::opt<bool> DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden); +// @LOCALMOD-START +// TODO: * JITing has not been tested at all +// * Thumb mode operation is also not clear: it seems jump tables +// for thumb are broken independent of this option +static cl::opt<bool> +NoInlineJumpTables("no-inline-jumptables", + cl::desc("Do not place jump tables inline in the code")); +// @LOCALMOD-END + static cl::opt<bool> UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); @@ -64,6 +73,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, , NoARM(false) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) + , UseInlineJumpTables(!NoInlineJumpTables) // @LOCALMOD , UseMovt(false) , SupportsTailCall(false) , HasFP16(false) @@ -126,6 +136,18 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0); } + // @LOCALMOD-BEGIN + // Advanced SIMD and Q registers are part of the NaCl ARM ABI. The ARM + // EABI specifies only an 8 byte alignment, which can result in poor + // performance for these 16 byte data types if they straddle cache lines, etc. + // Therefore, NaCl aligns stack frames 0mod16. + if (isTargetNaCl()) + stackAlignment = 16; + // NaCl uses MovT to avoid generating constant islands. + if (isTargetNaCl() && !useConstPool()) + UseMovt = true; + // @LOCALMOD-END + if (!isThumb() || hasThumb2()) PostRAScheduler = true; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8e6b650602..e99d1d4a48 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -23,6 +23,15 @@ #define GET_SUBTARGETINFO_HEADER #include "ARMGenSubtargetInfo.inc" +// @LOCALMOD-BEGIN +#include "llvm/Support/CommandLine.h" +namespace llvm { + extern cl::opt<bool> FlagSfiDisableCP; + extern cl::opt<bool> FlagNaClUseM23ArmAbi; +} +// @LOCALMOD-END + + namespace llvm { class GlobalValue; class StringRef; @@ -91,6 +100,11 @@ protected: /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; + // @LOCALMOD-START + /// UseInlineJumpTables - True if jump tables should be in-line in the code. + bool UseInlineJumpTables; + // @LOCALMOD-END + /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit /// imms (including global addresses). bool UseMovt; @@ -262,6 +276,9 @@ protected: bool useMovt() const { return UseMovt && hasV6T2Ops(); } bool supportsTailCall() const { return SupportsTailCall; } + // @LOCALMOD + bool useConstPool() const { return !FlagSfiDisableCP; } + bool allowsUnalignedMem() const { return AllowsUnalignedMem; } const std::string & getCPUString() const { return CPUString; } @@ -285,6 +302,8 @@ protected: /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect /// symbol. bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; + + bool useInlineJumpTables() const {return UseInlineJumpTables;} // @LOCALMOD }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b486d4fe2e..fed2d99e65 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,13 @@ EnableGlobalMerge("global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); +// @LOCALMOD-START +namespace llvm { +cl::opt<bool> FlagSfiDisableCP("sfi-disable-cp", + cl::desc("disable arm constant island pools")); +} +// @LOCALMOD-END + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget); @@ -194,8 +201,24 @@ bool ARMPassConfig::addPreEmitPass() { addPass(&UnpackMachineBundlesID); } + // @LOCALMOD-START + // Note with FlagSfiDisableCP we effectively disable the + // ARMConstantIslandPass and rely on movt/movw to eliminate the need + // for constant islands + if (FlagSfiDisableCP) { + assert(getARMSubtarget().useMovt()); + } + // @LOCALMOD-END + addPass(createARMConstantIslandPass()); + // @LOCALMOD-START + // This pass does all the heavy sfi lifting. + if (getARMSubtarget().isTargetNaCl()) { + addPass(createARMNaClRewritePass()); + } + // @LOCALMOD-END + return true; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index ebdd5b4d64..cd6921e1ae 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -29,6 +29,13 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/OwningPtr.h" +// @LOCALMOD-START +#include "llvm/Support/CommandLine.h" +namespace llvm { + extern cl::opt<bool> FlagSfiDisableCP; +} +// @LOCALMOD-END + namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 3d85ca7d69..22db332f2b 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -29,7 +29,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, InitializeELF(isAAPCS_ABI); if (isAAPCS_ABI) { - LSDASection = NULL; + //LSDASection = NULL; } AttributesSection = diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 377bd9243c..1ea4e00867 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,8 @@ add_llvm_target(ARMCodeGen ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp + ARMNaClHeaders.cpp + ARMNaClRewritePass.cpp ARMRegisterInfo.cpp ARMSelectionDAGInfo.cpp ARMSubtarget.cpp diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index dcc41d93f5..beeabb6d42 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -223,6 +223,71 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, return; } + // @LOCALMOD-BEGIN + // TODO(pdox): Kill this code once we switch to MC object emission + const char *SFIInst = NULL; + unsigned SFIEmitDest = ~0; + unsigned SFIEmitPred = ~0; + switch (Opcode) { + case ARM::SFI_NOP_IF_AT_BUNDLE_END : + SFIInst = "sfi_nop_if_at_bundle_end"; + SFIEmitDest = ~0; + SFIEmitPred = ~0; + break; + case ARM::SFI_GUARD_LOADSTORE : + SFIInst = "sfi_load_store_preamble"; + SFIEmitDest = 0; + SFIEmitPred = 2; + break; + case ARM::SFI_GUARD_INDIRECT_CALL: + SFIInst = "sfi_indirect_call_preamble"; + SFIEmitDest = 0; + SFIEmitPred = 2; + break; + case ARM::SFI_GUARD_INDIRECT_JMP : + SFIInst = "sfi_indirect_jump_preamble"; + SFIEmitDest = 0; + SFIEmitPred = 2; + break; + case ARM::SFI_DATA_MASK : + SFIInst = "sfi_data_mask"; + SFIEmitDest = 0; + SFIEmitPred = 2; + break; + case ARM::SFI_GUARD_LOADSTORE_TST: + SFIInst = "sfi_cload_store_preamble"; + SFIEmitDest = 0; + SFIEmitPred = ~0; + break; + case ARM::SFI_GUARD_CALL : + SFIInst = "sfi_call_preamble"; + SFIEmitDest = ~0; + SFIEmitPred = 0; + break; + case ARM::SFI_GUARD_RETURN : + SFIInst = "sfi_return_preamble lr,"; + SFIEmitDest = ~0; + SFIEmitPred = 0; + break; + } + if (SFIInst) { + O << '\t' << SFIInst; + if (SFIEmitDest != (unsigned)~0) { + O << ' '; + printOperand(MI, SFIEmitDest, O); + } + if (SFIEmitDest != (unsigned)~0 && SFIEmitPred != (unsigned)~0) { + O << ','; + } + if (SFIEmitPred != (unsigned)~0) { + O << ' '; + printPredicateOperand(MI, SFIEmitPred, O); + } + O << '\n'; + return; + } + // @LOCALMOD-END + if (Opcode == ARM::tLDMIA) { bool Writeback = true; unsigned BaseReg = MI->getOperand(0).getReg(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 1ba6ab039f..8abf449206 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -11,6 +11,7 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMMCNaCl.h" // @LOCALMOD #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDirectives.h" @@ -232,8 +233,16 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; + // @LOCALMOD-BEGIN-UPSTREAM + // FIXME: e1a00000 vs e320f000 + // e1a00000 is mov r0, r0 which may result in a stall + // but the real nop instruction is not available on early hw.... + // Perhaps this really needs to be switched on the Subtarget?? + // GNU as likes to emit e320f000... for (uint64_t i = 0; i != NumNops; ++i) - OW->Write32(nopEncoding); + OW->Write32(0xe320f000); // regular NOP + // @LOCALMOD-END + // FIXME: should this function return false when unable to write exactly // 'Count' bytes with NOP encodings? switch (Count % 4) { @@ -559,13 +568,31 @@ namespace { class ELFARMAsmBackend : public ARMAsmBackend { public: uint8_t OSABI; + Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove. ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t _OSABI) - : ARMAsmBackend(T, TT), OSABI(_OSABI) { } + uint8_t _OSABI, + Triple::OSType _OSType) + : ARMAsmBackend(T, TT), OSABI(_OSABI), OSType(_OSType) { } void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const; + // @LOCALMOD-BEGIN + // FIXME! NaCl should INHERIT from ELFARMAsmBackend, not + // add to it. + unsigned getBundleSize() const { + return (OSType == Triple::NativeClient) ? 16 : 0; + } + + bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const { + if (OSType == Triple::NativeClient) { + return CustomExpandInstNaClARM(Inst, Out); + } + return false; + } + + // @LOCALMOD-END + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createARMELFObjectWriter(OS, OSABI); } @@ -705,5 +732,5 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef assert(0 && "Windows not supported on ARM"); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI); + return new ELFARMAsmBackend(T, TT, OSABI, TheTriple.getOS()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 99e4f713f6..253d1fa2ab 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -71,10 +71,11 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, const MCFixup &Fixup, bool IsPCRel) const { const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol(); + const MCSymbol &ASymbol = Symbol.AliasedSymbol(); bool EmitThisSym = false; const MCSectionELF &Section = - static_cast<const MCSectionELF&>(Symbol.getSection()); + static_cast<const MCSectionELF&>(ASymbol.getSection()); bool InNormalSection = true; unsigned RelocType = 0; RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel); @@ -137,9 +138,9 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm, } if (EmitThisSym) - return &Symbol; + return &ASymbol; if (! Symbol.isTemporary() && InNormalSection) { - return &Symbol; + return &ASymbol; } return NULL; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index f0b289c6f3..059ee99f1c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -28,6 +28,12 @@ namespace llvm { virtual void anchor(); public: explicit ARMELFMCAsmInfo(); + // @LOCALMOD-BEGIN + // Exceptions handling + void setExceptionsType(ExceptionHandling::ExceptionsType ExType) { + ExceptionsType = ExType; + } + // @LOCALMOD-END }; } // namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp new file mode 100644 index 0000000000..98ee80c358 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp @@ -0,0 +1,330 @@ +//=== ARMMCNaCl.cpp - Expansion of NaCl pseudo-instructions --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-mc-nacl" + +#include "MCTargetDesc/ARMBaseInfo.h" +#include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +namespace llvm { + cl::opt<bool> FlagSfiZeroMask("sfi-zero-mask"); +} + +/// Two helper functions for emitting the actual guard instructions + +static void EmitBICMask(MCStreamer &Out, + unsigned Addr, int64_t Pred, unsigned Mask) { + // bic\Pred \Addr, \Addr, #Mask + MCInst BICInst; + BICInst.setOpcode(ARM::BICri); + BICInst.addOperand(MCOperand::CreateReg(Addr)); // rD + BICInst.addOperand(MCOperand::CreateReg(Addr)); // rS + if (FlagSfiZeroMask) { + BICInst.addOperand(MCOperand::CreateImm(0)); // imm + } else { + BICInst.addOperand(MCOperand::CreateImm(Mask)); // imm + } + BICInst.addOperand(MCOperand::CreateImm(Pred)); // predicate + BICInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); // CPSR + BICInst.addOperand(MCOperand::CreateReg(0)); // flag out + Out.EmitInstruction(BICInst); +} + +static void EmitTST(MCStreamer &Out, unsigned Reg) { + // tst \reg, #\MASK typically 0xc0000000 + const unsigned Mask = 0xC0000000; + MCInst TSTInst; + TSTInst.setOpcode(ARM::TSTri); + TSTInst.addOperand(MCOperand::CreateReg(Reg)); // rS + if (FlagSfiZeroMask) { + TSTInst.addOperand(MCOperand::CreateImm(0)); // imm + } else { + TSTInst.addOperand(MCOperand::CreateImm(Mask)); // imm + } + TSTInst.addOperand(MCOperand::CreateImm((int64_t)ARMCC::AL)); // Always + TSTInst.addOperand(MCOperand::CreateImm(0)); // flag out + Out.EmitInstruction(TSTInst); +} + + +// This is ONLY used for sandboxing stack changes. +// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that +// it must ensure that the two instructions are in the same bundle. +// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always +// emitted in conjunction with a SFI_DATA_MASK +// +static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) { + assert(I == 3 && + (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) && + (ARM::SFI_DATA_MASK == Saved[2].getOpcode()) && + "Unexpected SFI Pseudo while lowering"); + + unsigned Addr = Saved[2].getOperand(0).getReg(); + int64_t Pred = Saved[2].getOperand(2).getImm(); + assert((ARM::SP == Addr) && "Unexpected register at stack guard"); + + Out.EmitBundleLock(); + Out.EmitInstruction(Saved[1]); + EmitBICMask(Out, Addr, Pred, 0xC0000000); + Out.EmitBundleUnlock(); +} + +static void EmitDirectGuardCall(int I, MCInst Saved[], + MCStreamer &Out) { + // sfi_call_preamble cond= + // sfi_nops_to_force_slot3 + assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitIndirectGuardCall(int I, MCInst Saved[], + MCStreamer &Out) { + // sfi_indirect_call_preamble link cond= + // sfi_nops_to_force_slot2 + // sfi_code_mask \link \cond + assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + int64_t Pred = Saved[0].getOperand(2).getImm(); + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + EmitBICMask(Out, Reg, Pred, 0xC000000F); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_indirect_jump_preamble link cond= + // sfi_nop_if_at_bundle_end + // sfi_code_mask \link \cond + assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + int64_t Pred = Saved[0].getOperand(2).getImm(); + + Out.EmitBundleLock(); + EmitBICMask(Out, Reg, Pred, 0xC000000F); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_return_preamble reg cond= + // sfi_nop_if_at_bundle_end + // sfi_code_mask \reg \cond + assert(I == 2 && (ARM::SFI_GUARD_RETURN == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN"); + int64_t Pred = Saved[0].getOperand(0).getImm(); + + Out.EmitBundleLock(); + EmitBICMask(Out, ARM::LR, Pred, 0xC000000F); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_store_preamble reg cond ----> + // sfi_nop_if_at_bundle_end + // sfi_data_mask \reg, \cond + assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + int64_t Pred = Saved[0].getOperand(2).getImm(); + + Out.EmitBundleLock(); + EmitBICMask(Out, Reg, Pred, 0xC0000000); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_cstore_preamble reg --> + // sfi_nop_if_at_bundle_end + // sfi_data_tst \reg + assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE_TST == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + + Out.EmitBundleLock(); + EmitTST(Out, Reg); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +// This is ONLY used for loads into the stack pointer. +static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) { + assert(I == 4 && + (ARM::SFI_GUARD_SP_LOAD == Saved[0].getOpcode()) && + (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[1].getOpcode()) && + (ARM::SFI_DATA_MASK == Saved[3].getOpcode()) && + "Unexpected SFI Pseudo while lowering"); + + unsigned AddrReg = Saved[0].getOperand(0).getReg(); + unsigned SpReg = Saved[3].getOperand(0).getReg(); + int64_t Pred = Saved[3].getOperand(2).getImm(); + assert((ARM::SP == SpReg) && "Unexpected register at stack guard"); + + Out.EmitBundleLock(); + EmitBICMask(Out, AddrReg, Pred, 0xC0000000); + Out.EmitInstruction(Saved[2]); + EmitBICMask(Out, SpReg, Pred, 0xC0000000); + Out.EmitBundleUnlock(); +} + +namespace llvm { +// CustomExpandInstNaClARM - +// If Inst is a NaCl pseudo instruction, emits the substitute +// expansion to the MCStreamer and returns true. +// Otherwise, returns false. +// +// NOTE: Each time this function calls Out.EmitInstruction(), it will be +// called again recursively to rewrite the new instruction being emitted. +// Care must be taken to ensure that this does not result in an infinite +// loop. Also, global state must be managed carefully so that it is +// consistent during recursive calls. +// +// We need global state to keep track of the explicit prefix (PREFIX_*) +// instructions. Unfortunately, the assembly parser prefers to generate +// these instead of combined instructions. At this time, having only +// one explicit prefix is supported. + + +bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out) { + const int MaxSaved = 4; + static MCInst Saved[MaxSaved]; + static int SaveCount = 0; + static int I = 0; + // This routine only executes if RecurseGuard == 0 + static bool RecurseGuard = false; + + // If we are emitting to .s, just emit all pseudo-instructions directly. + if (Out.hasRawTextSupport()) { + return false; + } + + //No recursive calls allowed; + if (RecurseGuard) return false; + + unsigned Opc = Inst.getOpcode(); + + DEBUG(dbgs() << "CustomExpandInstNaClARM("; Inst.dump(); dbgs() << ")\n"); + + // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of + // a stack guard in conjunction with a SFI_DATA_MASK + + // Logic: + // This is somewhat convoluted, but in the current model, the SFI + // guard pseudo instructions occur PRIOR to the actual instruction. + // So, the bundling/alignment operation has to refer to the FOLLOWING + // one or two instructions. + // + // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_* + // pseudo and the very next one or two instructions are used as arguments to + // the Emit*() functions in this file. This is the reason why we have a + // doublely nested switch here. First, to save the SFI_* pseudo, then to + // emit it and the next instruction + + // By default, we only need to save two or three instructions + + if ((I == 0) && (SaveCount == 0)) { + // Base State, no saved instructions. + // If the current instruction is a SFI instruction, set the SaveCount + // and fall through. + switch (Opc) { + default: + SaveCount = 0; // Nothing to do. + return false; // Handle this Inst elsewhere. + case ARM::SFI_NOP_IF_AT_BUNDLE_END: + SaveCount = 3; + break; + case ARM::SFI_DATA_MASK: + SaveCount = 0; // Do nothing. + break; + case ARM::SFI_GUARD_CALL: + case ARM::SFI_GUARD_INDIRECT_CALL: + case ARM::SFI_GUARD_INDIRECT_JMP: + case ARM::SFI_GUARD_RETURN: + case ARM::SFI_GUARD_LOADSTORE: + case ARM::SFI_GUARD_LOADSTORE_TST: + SaveCount = 2; + break; + case ARM::SFI_GUARD_SP_LOAD: + SaveCount = 4; + break; + } + } + + if (I < SaveCount) { + // Othewise, save the current Inst and return + Saved[I++] = Inst; + if (I < SaveCount) + return true; + // Else fall through to next stat + } + + if (SaveCount > 0) { + assert(I == SaveCount && "Bookeeping Error"); + SaveCount = 0; // Reset for next iteration + // The following calls may call Out.EmitInstruction() + // which must not again call CustomExpandInst ... + // So set RecurseGuard = 1; + RecurseGuard = true; + + switch (Saved[0].getOpcode()) { + default: /* No action required */ break; + case ARM::SFI_NOP_IF_AT_BUNDLE_END: + EmitDataMask(I, Saved, Out); + break; + case ARM::SFI_DATA_MASK: + assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst"); + break; + case ARM::SFI_GUARD_CALL: + EmitDirectGuardCall(I, Saved, Out); + break; + case ARM::SFI_GUARD_INDIRECT_CALL: + EmitIndirectGuardCall(I, Saved, Out); + break; + case ARM::SFI_GUARD_INDIRECT_JMP: + EmitIndirectGuardJmp(I, Saved, Out); + break; + case ARM::SFI_GUARD_RETURN: + EmitGuardReturn(I, Saved, Out); + break; + case ARM::SFI_GUARD_LOADSTORE: + EmitGuardLoadOrStore(I, Saved, Out); + break; + case ARM::SFI_GUARD_LOADSTORE_TST: + EmitGuardLoadOrStoreTst(I, Saved, Out); + break; + case ARM::SFI_GUARD_SP_LOAD: + EmitGuardSpLoad(I, Saved, Out); + break; + } + I = 0; // Reset I for next. + assert(RecurseGuard && "Illegal Depth"); + RecurseGuard = false; + return true; + } + + return false; +} + +} // namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h new file mode 100644 index 0000000000..de7ed50662 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h @@ -0,0 +1,19 @@ +//===-- ARMMCNaCl.h - Prototype for CustomExpandInstNaClARM ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMMCNACL_H +#define ARMMCNACL_H + +namespace llvm { + class MCInst; + class MCStreamer; + bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out); +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 00ffc94ac7..7a57e40a17 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -89,7 +89,13 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk"; else // Use CPU to figure out the exact features. - ARMArchFeature = "+v7"; + // @LOCALMOD-BEGIN + // Orig: ARMArchFeature = "+v7"; + // TODO(pdox): Eliminate this strange exception, possibly + // with our own cpu tag. (neon doesn't work, but vfp2 does). + // We also don't seem to handle The DSP features. + ARMArchFeature = "+v7,+db,+vfp2"; + // @LOCALMOD-END } } else if (SubVer == '6') { if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') @@ -154,7 +160,18 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) { if (TheTriple.isOSDarwin()) return new ARMMCAsmInfoDarwin(); - return new ARMELFMCAsmInfo(); + // @LOCALMOD-BEGIN + ARMELFMCAsmInfo *MAI = new ARMELFMCAsmInfo(); + if (TheTriple.getOS() == Triple::NativeClient) { + // NativeClient uses Dwarf exception handling + MAI->setExceptionsType(ExceptionHandling::DwarfCFI); + // Initial state of the frame ARM:SP points to cfa + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(ARM::SP, 0); + MAI->addInitialFrameState(0, Dst, Src); + } + return MAI; + // @LOCALMOD-END } static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index 256599412e..3ee853c822 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMARMDesc ARMMCAsmInfo.cpp ARMMCCodeEmitter.cpp ARMMCExpr.cpp + ARMMCNaCl.cpp # LOCALMOD ARMMCTargetDesc.cpp ARMMachObjectWriter.cpp ARMELFObjectWriter.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 9a35bb6bd7..9a94c75e2f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -14,6 +14,7 @@ #include "MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" +#include "MCTargetDesc/MipsMCNaCl.h" // @LOCALMOD #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" @@ -239,6 +240,20 @@ public: OW->Write32(0); return true; } + + // @LOCALMOD-BEGIN + // FIXME! NaCl should INHERIT from MipsAsmBackend, not add to it. + unsigned getBundleSize() const { + return (OSType == Triple::NativeClient) ? 16 : 0; + } + + bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const { + if (OSType == Triple::NativeClient) { + return CustomExpandInstNaClMips(Inst, Out); + } + return false; + } + // @LOCALMOD-END }; // class MipsAsmBackend } // namespace diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 5d240fe847..6ad8669d04 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -18,6 +18,13 @@ #include "llvm/Support/ErrorHandling.h" #include <list> +// @LOCALMOD-START +// TODO(petarj): HACK! Find better way to set ELF::EF_MIPS_PIC flag. +// See also file lib/MC/MCObjectFileInfo.cpp. +#include "llvm/Support/CodeGen.h" +extern llvm::Reloc::Model RelocModelOption; +// @LOCALMOD-END + using namespace llvm; namespace { @@ -71,6 +78,10 @@ unsigned MipsELFObjectWriter::getEFlags() const { Flag |= ELF::EF_MIPS_ARCH_64R2; else Flag |= ELF::EF_MIPS_ARCH_32R2; + /* @LOCLAMOD-START */ + if (RelocModelOption == Reloc::PIC_ || RelocModelOption == Reloc::Default) + Flag |= ELF::EF_MIPS_PIC; + /* @LOCLAMOD-END */ return Flag; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp new file mode 100644 index 0000000000..d39a60d41c --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp @@ -0,0 +1,261 @@ +//=== MipsMCNaCl.cpp - Expansion of NaCl pseudo-instructions --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "mips-mc-nacl" + +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +/// Two helper functions for emitting the actual guard instructions + +static void EmitMask(MCStreamer &Out, + unsigned Addr, unsigned Mask) { + // and \Addr, \Addr, \Mask + MCInst MaskInst; + MaskInst.setOpcode(Mips::AND); + MaskInst.addOperand(MCOperand::CreateReg(Addr)); + MaskInst.addOperand(MCOperand::CreateReg(Addr)); + MaskInst.addOperand(MCOperand::CreateReg(Mask)); + Out.EmitInstruction(MaskInst); +} + +// This is ONLY used for sandboxing stack changes. +// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that +// it must ensure that the two instructions are in the same bundle. +// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always +// emitted in conjunction with a SFI_DATA_MASK +// +static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) { + assert(I == 3 && + (Mips::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) && + (Mips::SFI_DATA_MASK == Saved[2].getOpcode()) && + "Unexpected SFI Pseudo while lowering"); + + unsigned Addr = Saved[2].getOperand(0).getReg(); + unsigned Mask = Saved[2].getOperand(2).getReg(); + assert((Mips::SP == Addr) && "Unexpected register at stack guard"); + + Out.EmitBundleLock(); + Out.EmitInstruction(Saved[1]); + EmitMask(Out, Addr, Mask); + Out.EmitBundleUnlock(); +} + +static void EmitDirectGuardCall(int I, MCInst Saved[], + MCStreamer &Out) { + // sfi_call_preamble ---> + // sfi_nops_to_force_slot2 + assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL"); + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + Out.EmitInstruction(Saved[1]); + Out.EmitInstruction(Saved[2]); + Out.EmitBundleUnlock(); +} + +static void EmitIndirectGuardCall(int I, MCInst Saved[], + MCStreamer &Out) { + // sfi_indirect_call_preamble link ---> + // sfi_nops_to_force_slot1 + // sfi_code_mask \link \link \maskreg + assert(I == 3 && (Mips::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_CALL"); + + unsigned Addr = Saved[0].getOperand(0).getReg(); + unsigned Mask = Saved[0].getOperand(2).getReg(); + + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + EmitMask(Out, Addr, Mask); + Out.EmitInstruction(Saved[1]); + Out.EmitInstruction(Saved[2]); + Out.EmitBundleUnlock(); +} + +static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_indirect_jump_preamble link ---> + // sfi_nop_if_at_bundle_end + // sfi_code_mask \link \link \maskreg + assert(I == 2 && (Mips::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_JMP"); + unsigned Addr = Saved[0].getOperand(0).getReg(); + unsigned Mask = Saved[0].getOperand(2).getReg(); + + Out.EmitBundleLock(); + EmitMask(Out, Addr, Mask); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_return_preamble reg ---> + // sfi_nop_if_at_bundle_end + // sfi_code_mask \reg \reg \maskreg + assert(I == 2 && (Mips::SFI_GUARD_RETURN == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + unsigned Mask = Saved[0].getOperand(2).getReg(); + + Out.EmitBundleLock(); + EmitMask(Out, Reg, Mask); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) { + // sfi_load_store_preamble reg ---> + // sfi_nop_if_at_bundle_end + // sfi_data_mask \reg \reg \maskreg + assert(I == 2 && (Mips::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) && + "Unexpected SFI Pseudo while lowering SFI_GUARD_LOADSTORE"); + unsigned Reg = Saved[0].getOperand(0).getReg(); + unsigned Mask = Saved[0].getOperand(2).getReg(); + + Out.EmitBundleLock(); + EmitMask(Out, Reg, Mask); + Out.EmitInstruction(Saved[1]); + Out.EmitBundleUnlock(); +} + +namespace llvm { +// CustomExpandInstNaClMips - +// If Inst is a NaCl pseudo instruction, emits the substitute +// expansion to the MCStreamer and returns true. +// Otherwise, returns false. +// +// NOTE: Each time this function calls Out.EmitInstruction(), it will be +// called again recursively to rewrite the new instruction being emitted. +// Care must be taken to ensure that this does not result in an infinite +// loop. Also, global state must be managed carefully so that it is +// consistent during recursive calls. +// +// We need global state to keep track of the explicit prefix (PREFIX_*) +// instructions. Unfortunately, the assembly parser prefers to generate +// these instead of combined instructions. At this time, having only +// one explicit prefix is supported. + + +bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out) { + const int MaxSaved = 4; + static MCInst Saved[MaxSaved]; + static int SaveCount = 0; + static int I = 0; + // This routine only executes if RecurseGuard == 0 + static bool RecurseGuard = false; + + // If we are emitting to .s, just emit all pseudo-instructions directly. + if (Out.hasRawTextSupport()) { + return false; + } + + //No recursive calls allowed; + if (RecurseGuard) return false; + + unsigned Opc = Inst.getOpcode(); + + DEBUG(dbgs() << "CustomExpandInstNaClMips("; Inst.dump(); dbgs() << ")\n"); + + // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of + // a stack guard in conjunction with a SFI_DATA_MASK + + // Logic: + // This is somewhat convoluted, but in the current model, the SFI + // guard pseudo instructions occur PRIOR to the actual instruction. + // So, the bundling/alignment operation has to refer to the FOLLOWING + // one or two instructions. + // + // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_* + // pseudo and the very next one or two instructions are used as arguments to + // the Emit*() functions in this file. This is the reason why we have a + // doublely nested switch here. First, to save the SFI_* pseudo, then to + // emit it and the next instruction + + // By default, we only need to save two or three instructions + + if ((I == 0) && (SaveCount == 0)) { + // Base State, no saved instructions. + // If the current instruction is a SFI instruction, set the SaveCount + // and fall through. + switch (Opc) { + default: + SaveCount = 0; // Nothing to do. + return false; // Handle this Inst elsewhere. + case Mips::SFI_NOP_IF_AT_BUNDLE_END: + case Mips::SFI_GUARD_CALL: + case Mips::SFI_GUARD_INDIRECT_CALL: + SaveCount = 3; + break; + case Mips::SFI_DATA_MASK: + SaveCount = 0; // Do nothing. + break; + case Mips::SFI_GUARD_INDIRECT_JMP: + case Mips::SFI_GUARD_RETURN: + case Mips::SFI_GUARD_LOADSTORE: + SaveCount = 2; + break; + } + } + + if (I < SaveCount) { + // Othewise, save the current Inst and return + Saved[I++] = Inst; + if (I < SaveCount) + return true; + // Else fall through to next stat + } + + if (SaveCount > 0) { + assert(I == SaveCount && "Bookeeping Error"); + SaveCount = 0; // Reset for next iteration + // The following calls may call Out.EmitInstruction() + // which must not again call CustomExpandInst ... + // So set RecurseGuard = 1; + RecurseGuard = true; + + switch (Saved[0].getOpcode()) { + default: /* No action required */ break; + case Mips::SFI_NOP_IF_AT_BUNDLE_END: + EmitDataMask(I, Saved, Out); + break; + case Mips::SFI_DATA_MASK: + assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst"); + break; + case Mips::SFI_GUARD_CALL: + EmitDirectGuardCall(I, Saved, Out); + break; + case Mips::SFI_GUARD_INDIRECT_CALL: + EmitIndirectGuardCall(I, Saved, Out); + break; + case Mips::SFI_GUARD_INDIRECT_JMP: + EmitIndirectGuardJmp(I, Saved, Out); + break; + case Mips::SFI_GUARD_RETURN: + EmitGuardReturn(I, Saved, Out); + break; + case Mips::SFI_GUARD_LOADSTORE: + EmitGuardLoadOrStore(I, Saved, Out); + break; + } + I = 0; // Reset I for next. + assert(RecurseGuard && "Illegal Depth"); + RecurseGuard = false; + return true; + } + return false; +} + +} // namespace llvm diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h new file mode 100644 index 0000000000..c90502ec33 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h @@ -0,0 +1,19 @@ +//===-- MipsMCNaCl.h - Prototype for CustomExpandInstNaClMips ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSMCNACL_H +#define MIPSMCNACL_H + +namespace llvm { + class MCInst; + class MCStreamer; + bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out); +} + +#endif diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index 2963f7e7fa..411030aaa1 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -18,6 +18,16 @@ #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/Target/TargetMachine.h" +/* @LOCALMOD-START */ +namespace llvm { + +namespace Mips { + extern unsigned LoadStoreStackMaskReg; + extern unsigned IndirectBranchMaskReg; +} +} // End llvm namespace +/* @LOCALMOD-END */ + namespace llvm { class MipsTargetMachine; class FunctionPass; @@ -28,6 +38,10 @@ namespace llvm { FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM, JITCodeEmitter &JCE); + // @LOCALMOD-START + FunctionPass *createMipsNaClRewritePass(); + // @LOCALMOD-END + } // end namespace llvm; #endif diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index bf2818d61d..9bb39a424c 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -212,13 +212,24 @@ const char *MipsAsmPrinter::getCurrentABIString() const { } void MipsAsmPrinter::EmitFunctionEntryLabel() { - if (OutStreamer.hasRawTextSupport()) { + // @LOCALMOD-START + // make sure function entry is aligned. We use XmagicX as our basis + // for alignment decisions (c.f. assembler sfi macros). + int alignment = MF->getAlignment(); + if (alignment < 4) alignment = 4; + EmitAlignment(alignment); + if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) { if (Subtarget->inMips16Mode()) OutStreamer.EmitRawText(StringRef("\t.set\tmips16")); else OutStreamer.EmitRawText(StringRef("\t.set\tnomips16")); // leave out until FSF available gas has micromips changes // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips")); + OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n")); + } + // @LOCALMOD-END + + if (OutStreamer.hasRawTextSupport()) { OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); } OutStreamer.EmitLabel(CurrentFnSym); @@ -519,6 +530,10 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } +// @LOCALMOD-START +extern void EmitMipsSFIHeaders(raw_ostream &O); +// @LOCALMOD-END + void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: Use SwitchSection. @@ -540,7 +555,35 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // return to previous section if (OutStreamer.hasRawTextSupport()) OutStreamer.EmitRawText(StringRef("\t.previous")); + + // @LOCALMOD-START + if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) { + std::string str; + raw_string_ostream OS(str); + EmitMipsSFIHeaders(OS); + OutStreamer.EmitRawText(StringRef(OS.str())); + } + // @LOCALMOD-END +} + +// @LOCALMOD-START +unsigned MipsAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const { + if (Subtarget->isTargetNaCl()) { + switch (MI->getOpcode()) { + default: return 0; + // These labels may indicate an indirect entry point that is + // externally reachable and hence must be bundle aligned. + // Note: these labels appear to be always at basic block beginnings + // so it may be possible to simply set the MBB alignment. + // However, it is unclear whether this always holds. + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + return 4; + } + } + return 0; } +// @LOCALMOD-END MachineLocation MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index 94d8bfa105..efed6357a4 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -82,6 +82,10 @@ public: void EmitStartOfAsmFile(Module &M); virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); + + // @LOCALMOD-START + virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const; + // @LOCALMOD-END }; } diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e3c8ed75cf..d014ba1792 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -141,6 +141,11 @@ FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) { return new Filler(tm); } +// @LOCALMOD-START +extern bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx); +extern bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx); +// @LOCALMOD-END + bool Filler::findDelayInstr(MachineBasicBlock &MBB, InstrIter slot, InstrIter &Filler) { @@ -160,11 +165,18 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB, // Convert to forward iterator. InstrIter FI(llvm::next(I).base()); + int Dummy; // @LOCALMOD if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isLabel() || FI == LastFiller || I->isPseudo() + // @LOCALMOD-START + // Don't put in delay slot instructions that could be masked. + || IsDangerousLoad(*FI, &Dummy) + || IsDangerousStore(*FI, &Dummy) + || FI->modifiesRegister(Mips::SP, TM.getRegisterInfo()) + // @LOCALMOD-END // // Should not allow: // ERET, DERET or WAIT, PAUSE. Need to add these to instruction diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index c5fca7f4b2..778fe34275 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -390,7 +390,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) { if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) && - Subtarget.hasMips32r2Or64()) + Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/) return false; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e225b6c28e..32cf6c8be7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -369,6 +369,13 @@ MipsTargetLowering(MipsTargetMachine &TM) setTruncStoreAction(MVT::i64, MVT::i32, Custom); } + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) { + setOperationAction(ISD::NACL_TP_TLS_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::NACL_TP_TDB_OFFSET, MVT::i32, Custom); + } + // @LOCALMOD-END + setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); setTargetDAGCombine(ISD::SDIVREM); @@ -919,6 +926,10 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); + // @LOCALMOD-BEGIN + case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); + case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG); + // @LOCALMOD-END } return SDValue(); } @@ -1817,6 +1828,24 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo); } +// @LOCALMOD-BEGIN + +// NaCl TLS setup / layout intrinsics. +// See: native_client/src/untrusted/nacl/tls_params.h +SDValue MipsTargetLowering::LowerNaClTpTlsOffset(SDValue Op, + SelectionDAG &DAG) const { + return DAG.getConstant(0, Op.getValueType().getSimpleVT()); +} + +SDValue MipsTargetLowering::LowerNaClTpTdbOffset(SDValue Op, + SelectionDAG &DAG) const { + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(), + DAG.getConstant(0, Op.getValueType().getSimpleVT()), + Op.getOperand(0)); +} +// @LOCALMOD-END + SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -1831,6 +1860,38 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const TLSModel::Model model = getTargetMachine().getTLSModel(GV); + // @LOCALMOD-BEGIN + if (getTargetMachine().getSubtarget<MipsSubtarget>().isTargetNaCl()) { + SDVTList VTs = DAG.getVTList(MVT::i32); + SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_TPREL_HI); + SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_TPREL_LO); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, VTs, &TGAHi, 1); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, TGALo); + SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + + SDValue TlsReadTp = DAG.getExternalSymbol("__nacl_read_tp", PtrVT); + + ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, + TlsReadTp, Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + SDValue ThreadPointer = CallResult.first; + SDValue TPOffset = DAG.getConstant(0x7000, MVT::i32); + SDValue ThreadPointer2 = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, + TPOffset); + return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer2, Offset); + } + // @LOCALMOD-END + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { // General Dynamic and Local Dynamic TLS Model. unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 43f97e89a7..77045c3162 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -304,6 +304,11 @@ namespace llvm { void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC, SDValue Chain, DebugLoc DL, SelectionDAG &DAG) const; + // @LOCALMOD-BEGIN + SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const; + // @LOCALMOD-END + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 33ee020689..7844df9f40 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -282,23 +282,24 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in { } // Indexed loads and stores. -let Predicates = [HasMips32r2Or64, HasStandardEncoding] in { +let Predicates = [HasMips32r2Or64, IsNotNaCl/*@LOCALMOD*/] in { def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load>; def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store>; } -let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in { +let Predicates = [HasMips32r2, NotMips64, IsNotNaCl/*@LOCALMOD*/] in { def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load>; def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store>; } -let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in { +let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/], + DecoderNamespace="Mips64" in { def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load>; def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store>; } // n64 -let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in { +let Predicates = [IsN64, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in { def LWXC1_P8 : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load>; def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load>; def SWXC1_P8 : FPIdxStore<0x8, "swxc1", FGR32, CPU64Regs, store>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index f16b5f9ee7..3142ac94b1 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -174,6 +174,8 @@ def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">, AssemblerPredicate<"FeatureMips32">; def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; +def IsNaCl : Predicate<"Subtarget.isTargetNaCl()">; +def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; class MipsPat<dag pattern, dag result> : Pat<pattern, result> { let Predicates = [HasStandardEncoding]; @@ -859,6 +861,37 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> : // Pseudo instructions //===----------------------------------------------------------------------===// +// @LOCALMOD-START + +// Older Macro based SFI Model +def SFI_GUARD_LOADSTORE : +MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_load_store_preamble\t$dst, $src1, $src2", []>; + +def SFI_GUARD_INDIRECT_CALL : +MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_indirect_call_preamble\t$dst, $src1, $src2", []>; + +def SFI_GUARD_INDIRECT_JMP : +MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_indirect_jump_preamble\t$dst, $src1, $src2", []>; + +def SFI_GUARD_CALL : +MipsPseudo<(outs), (ins), "sfi_call_preamble", []>; + +def SFI_GUARD_RETURN : +MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_return_preamble\t$dst, $src1, $src2", []>; + +def SFI_NOP_IF_AT_BUNDLE_END : +MipsPseudo<(outs), (ins), "sfi_nop_if_at_bundle_end", []>; + +def SFI_DATA_MASK : +MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2), + "sfi_data_mask\t$dst, $src1, $src2", []>; + +// @LOCALMOD-END + // Return RA. let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in def RetRA : PseudoSE<(outs), (ins), "", [(MipsRet)]>; diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 5fa6339338..d8119ff75c 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -160,3 +160,4 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } } + diff --git a/lib/Target/Mips/MipsNaClHeaders.cpp b/lib/Target/Mips/MipsNaClHeaders.cpp new file mode 100644 index 0000000000..375c287d67 --- /dev/null +++ b/lib/Target/Mips/MipsNaClHeaders.cpp @@ -0,0 +1,128 @@ +//===-- MipsNaClHeaders.cpp - Print SFI headers to an Mips .s file --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the initial header string needed +// for the Native Client target in Mips assembly. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/raw_ostream.h" +#include "MipsNaClRewritePass.h" +#include <string> + +using namespace llvm; + +void EmitMipsSFIHeaders(raw_ostream &O) { + O << " # ========================================\n"; + O << "# Branch: " << FlagSfiBranch << "\n"; + O << "# Stack: " << FlagSfiStack << "\n"; + O << "# Store: " << FlagSfiStore << "\n"; + O << "# Load: " << FlagSfiLoad << "\n"; + + O << " # ========================================\n"; + // NOTE: this macro does bundle alignment as follows + // if current bundle pos is X emit pX data items of value "val" + // NOTE: that pos will be one of: 0,4,8,12 + // + O << + "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n" + "\t.set pos, (. - XmagicX) % 16\n" + "\t.fill (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nop_if_at_bundle_end\n" + "\tsfi_long_based_on_pos 0 0 0 1 0x00000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot3\n" + "\tsfi_long_based_on_pos 3 2 1 0 0x00000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot2\n" + "\tsfi_long_based_on_pos 2 1 0 3 0x00000000\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_nops_to_force_slot1\n" + "\tsfi_long_based_on_pos 1 0 3 2 0x00000000\n" + "\t.endm\n" + "\n\n"; + + O << " # ========================================\n"; + O << + "\t.macro sfi_data_mask reg1 reg2 maskreg\n" + "\tand \\reg1, \\reg2, \\maskreg\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_code_mask reg1 reg2 maskreg\n" + "\tand \\reg1, \\reg2, \\maskreg\n" + "\t.endm\n" + "\n\n"; + + O << " # ========================================\n"; + if (FlagSfiBranch) { + O << + "\t.macro sfi_call_preamble\n" + "\tsfi_nops_to_force_slot2\n" + "\t.endm\n" + "\n\n"; + + O << + "\t.macro sfi_return_preamble reg1 reg2 maskreg\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n" + "\t.endm\n" + "\n\n"; + + // This is used just before "jr" + O << + "\t.macro sfi_indirect_jump_preamble reg1 reg2 maskreg\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n" + "\t.endm\n" + "\n\n"; + + // This is used just before "jalr" + O << + "\t.macro sfi_indirect_call_preamble reg1 reg2 maskreg\n" + "\tsfi_nops_to_force_slot1\n" + "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n" + "\t.endm\n" + "\n\n"; + + } + + if (FlagSfiStore) { + O << " # ========================================\n"; + + O << + "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n" + "\tsfi_nop_if_at_bundle_end\n" + "\tsfi_data_mask \\reg1, \\reg2 , \\maskreg\n" + "\t.endm\n" + "\n\n"; + } else { + O << + "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n" + "\t.endm\n" + "\n\n"; + } + + O << " # ========================================\n"; + O << "\t.text\n"; +} diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp new file mode 100644 index 0000000000..f675e5663a --- /dev/null +++ b/lib/Target/Mips/MipsNaClRewritePass.cpp @@ -0,0 +1,333 @@ +//===-- MipsNaClRewritePass.cpp - Native Client Rewrite Pass -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Native Client Rewrite Pass +// This final pass inserts the sandboxing instructions needed to run inside +// the Native Client sandbox. Native Client requires certain software fault +// isolation (SFI) constructions to be put in place, to prevent escape from +// the sandbox. Native Client refuses to execute binaries without the correct +// SFI sequences. +// +// Potentially dangerous operations which are protected include: +// * Stores +// * Branches +// * Changes to SP +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-sfi" +#include "Mips.h" +#include "MipsInstrInfo.h" +#include "MipsNaClRewritePass.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +unsigned Mips::IndirectBranchMaskReg = Mips::T6; +unsigned Mips::LoadStoreStackMaskReg = Mips::T7; + +namespace { + class MipsNaClRewritePass : public MachineFunctionPass { + public: + static char ID; + MipsNaClRewritePass() : MachineFunctionPass(ID) {} + + const MipsInstrInfo *TII; + const TargetRegisterInfo *TRI; + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "Mips Native Client Rewrite Pass"; + } + + private: + + bool SandboxLoadsInBlock(MachineBasicBlock &MBB); + bool SandboxStoresInBlock(MachineBasicBlock &MBB); + void SandboxLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr &MI, + int AddrIdx); + + bool SandboxBranchesInBlock(MachineBasicBlock &MBB); + bool SandboxStackChangesInBlock(MachineBasicBlock &MBB); + + void SandboxStackChange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + void AlignAllJumpTargets(MachineFunction &MF); + }; + char MipsNaClRewritePass::ID = 0; +} + +static bool IsReturn(const MachineInstr &MI) { + return (MI.getOpcode() == Mips::RET); +} + +static bool IsIndirectJump(const MachineInstr &MI) { + return (MI.getOpcode() == Mips::JR); +} + +static bool IsIndirectCall(const MachineInstr &MI) { + return (MI.getOpcode() == Mips::JALR); +} + +static bool IsDirectCall(const MachineInstr &MI) { + return ((MI.getOpcode() == Mips::JAL) || (MI.getOpcode() == Mips::BGEZAL) + || (MI.getOpcode() == Mips::BLTZAL)); +; +} + +static bool IsStackMask(const MachineInstr &MI) { + return (MI.getOpcode() == Mips::SFI_DATA_MASK); +} + +static bool NeedSandboxStackChange(const MachineInstr &MI, + const TargetRegisterInfo *TRI) { + if (IsDirectCall(MI) || IsIndirectCall(MI)) { + // We check this first because method modifiesRegister + // returns true for calls. + return false; + } + return (MI.modifiesRegister(Mips::SP, TRI) && !IsStackMask(MI)); +} + +void MipsNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_NOP_IF_AT_BUNDLE_END)); + + // Get to next instr (one + to get the original, and one more + to get past). + MachineBasicBlock::iterator MBBINext = (MBBI++); + MachineBasicBlock::iterator MBBINext2 = (MBBI++); + + BuildMI(MBB, MBBINext2, MI.getDebugLoc(), + TII->get(Mips::SFI_DATA_MASK), Mips::SP) + .addReg(Mips::SP) + .addReg(Mips::LoadStoreStackMaskReg); + return; +} + +bool MipsNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + if (NeedSandboxStackChange(MI, TRI)) { + SandboxStackChange(MBB, MBBI); + Modified = true; + } + } + return Modified; +} + +bool MipsNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + + if (IsReturn(MI)) { + unsigned AddrReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_GUARD_RETURN), AddrReg) + .addReg(AddrReg) + .addReg(Mips::IndirectBranchMaskReg); + Modified = true; + } else if (IsIndirectJump(MI)) { + unsigned AddrReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_GUARD_INDIRECT_JMP), AddrReg) + .addReg(AddrReg) + .addReg(Mips::IndirectBranchMaskReg); + Modified = true; + } else if (IsDirectCall(MI)) { + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_GUARD_CALL)); + Modified = true; + } else if (IsIndirectCall(MI)) { + unsigned AddrReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_GUARD_INDIRECT_CALL), AddrReg) + .addReg(AddrReg) + .addReg(Mips::IndirectBranchMaskReg); + Modified = true; + } + } + + return Modified; +} + +/* + * Sandboxes a load or store instruction by inserting an appropriate mask + * operation before it. + */ +void MipsNaClRewritePass::SandboxLoadStore(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineInstr &MI, + int AddrIdx) { + unsigned BaseReg = MI.getOperand(AddrIdx).getReg(); + + BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(Mips::SFI_GUARD_LOADSTORE), BaseReg) + .addReg(BaseReg) + .addReg(Mips::LoadStoreStackMaskReg); + return; +} + +bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: return false; + + // Instructions with base address register in position 1 + case Mips::LB: + case Mips::LBu: + case Mips::LH: + case Mips::LHu: + case Mips::LW: + case Mips::LWC1: + case Mips::LDC1: + case Mips::LL: + case Mips::LWL: + case Mips::LWR: + *AddrIdx = 1; + break; + } + + if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) { + // The contents of SP do not require masking. + return false; + } + + return true; +} + +bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: return false; + + // Instructions with base address register in position 1 + case Mips::SB: + case Mips::SH: + case Mips::SW: + case Mips::SWC1: + case Mips::SDC1: + case Mips::SWL: + case Mips::SWR: + *AddrIdx = 1; + break; + + case Mips::SC: + *AddrIdx = 2; + break; + } + + if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) { + // The contents of SP do not require masking. + return false; + } + + return true; +} + +bool MipsNaClRewritePass::SandboxLoadsInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + int AddrIdx; + + if (IsDangerousLoad(MI, &AddrIdx)) { + SandboxLoadStore(MBB, MBBI, MI, AddrIdx); + Modified = true; + } + } + return Modified; +} + +bool MipsNaClRewritePass::SandboxStoresInBlock(MachineBasicBlock &MBB) { + bool Modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E; + ++MBBI) { + MachineInstr &MI = *MBBI; + int AddrIdx; + + if (IsDangerousStore(MI, &AddrIdx)) { + SandboxLoadStore(MBB, MBBI, MI, AddrIdx); + Modified = true; + } + } + return Modified; +} + +// Make sure all jump targets are aligned +void MipsNaClRewritePass::AlignAllJumpTargets(MachineFunction &MF) { + // JUMP TABLE TARGETS + MachineJumpTableInfo *jt_info = MF.getJumpTableInfo(); + if (jt_info) { + const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables(); + for (unsigned i=0; i < JT.size(); ++i) { + std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs; + + for (unsigned j=0; j < MBBs.size(); ++j) { + MBBs[j]->setAlignment(4); + } + } + } + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + I != E; ++I) { + MachineBasicBlock &MBB = *I; + if (MBB.hasAddressTaken()) + MBB.setAlignment(4); + } +} + +bool MipsNaClRewritePass::runOnMachineFunction(MachineFunction &MF) { + TII = static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo()); + TRI = MF.getTarget().getRegisterInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); + MFI != E; + ++MFI) { + MachineBasicBlock &MBB = *MFI; + + if (FlagSfiLoad) + Modified |= SandboxLoadsInBlock(MBB); + if (FlagSfiStore) + Modified |= SandboxStoresInBlock(MBB); + if (FlagSfiBranch) + Modified |= SandboxBranchesInBlock(MBB); + if (FlagSfiStack) + Modified |= SandboxStackChangesInBlock(MBB); + } + + if (FlagSfiBranch) + AlignAllJumpTargets(MF); + + return Modified; +} + +/// createMipsNaClRewritePass - returns an instance of the NaClRewritePass. +FunctionPass *llvm::createMipsNaClRewritePass() { + return new MipsNaClRewritePass(); +} diff --git a/lib/Target/Mips/MipsNaClRewritePass.h b/lib/Target/Mips/MipsNaClRewritePass.h new file mode 100644 index 0000000000..4e729ec985 --- /dev/null +++ b/lib/Target/Mips/MipsNaClRewritePass.h @@ -0,0 +1,21 @@ +//===-- MipsNaClRewritePass.h - NaCl Sandboxing Pass ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_MIPSNACLREWRITEPASS_H +#define TARGET_MIPSNACLREWRITEPASS_H + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + extern cl::opt<bool> FlagSfiLoad; + extern cl::opt<bool> FlagSfiStore; + extern cl::opt<bool> FlagSfiStack; + extern cl::opt<bool> FlagSfiBranch; +} + +#endif diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index d8e0dd436a..13893a1e31 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -81,7 +81,9 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const uint16_t ReservedCPURegs[] = { - Mips::ZERO, Mips::K0, Mips::K1, Mips::SP + Mips::ZERO, + Mips::T6, Mips::T7, Mips::T8, // @LOCALMOD: reserved for PNaCl use + Mips::K0, Mips::K1, Mips::SP }; static const uint16_t ReservedCPU64Regs[] = { diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 930af4dda1..1ff41ca358 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -33,6 +33,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false), HasDSP(false), HasDSPR2(false), IsAndroid(false) + // @LOCALMOD-START + , TargetTriple(TT) + // @LOCALMOD-END { std::string CPUName = CPU; if (CPUName.empty()) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index ff69237ec2..6eeab5c351 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -100,6 +100,8 @@ protected: InstrItineraryData InstrItins; + Triple TargetTriple; // @LOCALMOD + public: virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -152,6 +154,13 @@ public: bool hasMinMax() const { return HasMinMax; } bool hasSwap() const { return HasSwap; } bool hasBitCount() const { return HasBitCount; } + + // @LOCALMOD-BEGIN + bool isTargetNaCl() const { + return TargetTriple.getOS() == Triple::NativeClient; + } + // @LOCALMOD-END + }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 983ee21941..0ed3277306 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -116,6 +116,14 @@ bool MipsPassConfig::addPreEmitPass() { if (TM.getSubtarget<MipsSubtarget>().hasStandardEncoding()) addPass(createMipsLongBranchPass(TM)); + + // @LOCALMOD-START + if (getMipsSubtarget().isTargetNaCl()) { + // This pass does all the heavy sfi lifting. + addPass(createMipsNaClRewritePass()); + } + // @LOCALMOD-END + return true; } diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 881908b82c..211e6867a7 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -38,6 +38,23 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ ELF::SHF_WRITE |ELF::SHF_ALLOC, SectionKind::getBSS()); + // @LOCALMOD-BEGIN + // Without this the linker defined symbols __fini_array_start and + // __fini_array_end do not have useful values. c.f.: + // http://code.google.com/p/nativeclient/issues/detail?id=805 + if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) { + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + // @LOCALMOD-END } // A address must be loaded from a small section if its size is less than the @@ -67,6 +84,12 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, if (!Subtarget.useSmallSection()) return false; + // @LOCALMOD-BEGIN + // Do not use small section for NaCl. + if (Subtarget.isTargetNaCl()) + return false; + // @LOCALMOD-BEGIN + // Only global variables, not functions. const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); if (!GVA) diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 382571982b..ffc1d9f0d1 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -27,6 +27,7 @@ using namespace llvm; namespace llvm { bool HasDivModLibcall; bool AsmVerbosityDefault(false); + bool TLSUseCall; // @LOCALMOD } static cl::opt<bool> @@ -37,6 +38,20 @@ static cl::opt<bool> FunctionSections("ffunction-sections", cl::desc("Emit functions into separate sections"), cl::init(false)); +// @LOCALMOD-BEGIN +// Use a function call to get the thread pointer for TLS accesses, +// instead of using inline code. +static cl::opt<bool, true> +EnableTLSUseCall("mtls-use-call", + cl::desc("Use a function call to get the thread pointer for TLS accesses."), + cl::location(TLSUseCall), + cl::init(false)); + +static cl::opt<bool> + ForceTLSNonPIC("force-tls-non-pic", + cl::desc("Force TLS to use non-PIC models"), + cl::init(false)); +// @LOCALMOD-END //--------------------------------------------------------------------------- // TargetMachine Class @@ -111,7 +126,8 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { bool isHidden = Var->hasHiddenVisibility(); TLSModel::Model Model; - if (isPIC && !isPIE) { + if (isPIC && !isPIE && + !ForceTLSNonPIC) { // @LOCALMOD if (isLocal || isHidden) Model = TLSModel::LocalDynamic; else diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index f4d03a602c..1b2ffb01ad 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -26,6 +26,7 @@ set(sources X86JITInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp + X86NaClRewritePass.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp X86Subtarget.cpp diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 1c240e52a3..8be0c5e6d7 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMX86Desc X86MCTargetDesc.cpp X86MCAsmInfo.cpp X86MCCodeEmitter.cpp + X86MCNaCl.cpp # LOCALMOD X86MachObjectWriter.cpp X86ELFObjectWriter.cpp X86WinCOFFObjectWriter.cpp diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 467edadc7e..2c91c8c566 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -9,6 +9,7 @@ #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86FixupKinds.h" +#include "MCTargetDesc/X86MCNaCl.h" // @LOCALMOD #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -337,8 +338,10 @@ namespace { class ELFX86AsmBackend : public X86AsmBackend { public: uint8_t OSABI; - ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU) - : X86AsmBackend(T, CPU), OSABI(_OSABI) { + Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove. + ELFX86AsmBackend(const Target &T, uint8_t _OSABI, StringRef CPU, + Triple::OSType _OSType) + : X86AsmBackend(T, CPU), OSABI(_OSABI), OSType(_OSType) { HasReliableSymbolDifference = true; } @@ -346,12 +349,28 @@ public: const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section); return ES.getFlags() & ELF::SHF_MERGE; } + + // @LOCALMOD-BEGIN + // FIXME! NaCl should inherit from ELFX86AsmBackend! + unsigned getBundleSize() const { + return OSType == Triple::NativeClient ? 32 : 0; + } + + bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const { + if (OSType == Triple::NativeClient) { + return CustomExpandInstNaClX86(Inst, Out); + } + return false; + } + // @LOCALMOD-END + }; class ELFX86_32AsmBackend : public ELFX86AsmBackend { public: - ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU, + Triple::OSType OSType) // @LOCALMOD: kept OSType + : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, ELF::EM_386); @@ -360,8 +379,9 @@ public: class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: - ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) - : ELFX86AsmBackend(T, OSABI, CPU) {} + ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU, + Triple::OSType OSType) // @LOCALMOD: kept OSType + : ELFX86AsmBackend(T, OSABI, CPU, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { return createX86ELFObjectWriter(OS, /*IsELF64*/ true, OSABI, ELF::EM_X86_64); @@ -459,7 +479,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String return new WindowsX86AsmBackend(T, false, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new ELFX86_32AsmBackend(T, OSABI, CPU); + return new ELFX86_32AsmBackend(T, OSABI, CPU, TheTriple.getOS()); } MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { @@ -472,5 +492,5 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, String return new WindowsX86AsmBackend(T, true, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); - return new ELFX86_64AsmBackend(T, OSABI, CPU); + return new ELFX86_64AsmBackend(T, OSABI, CPU, TheTriple.getOS()); } diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 7ea1961dec..0ce4c126c2 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -291,6 +291,8 @@ namespace X86II { /// manual, this operand is described as pntr16:32 and pntr16:16 RawFrmImm16 = 44, + CustomFrm = 62, // @LOCALMOD + FormMask = 63, //===------------------------------------------------------------------===// @@ -542,6 +544,7 @@ namespace X86II { case X86II::MRMSrcReg: case X86II::RawFrmImm8: case X86II::RawFrmImm16: + case X86II::CustomFrm: // @LOCALMOD return -1; case X86II::MRMDestMem: return 0; diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 16488eb7ae..7706b9308e 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -76,8 +76,18 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) void X86ELFMCAsmInfo::anchor() { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { - if (T.getArch() == Triple::x86_64) - PointerSize = 8; + + // @LOCALMOD-BEGIN + if (T.getArch() == Triple::x86_64) { + if (T.getOS() == Triple::NativeClient) { + PointerSize = 4; + StackSlotSize = 8; + } else { + PointerSize = 8; + StackSlotSize = 8; + } + } + // @LOCALMOD-END AssemblerDialect = AsmWriterFlavor; diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 122204ae75..4c6036761a 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -846,7 +846,6 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, const MCInst &MI, const MCInstrDesc &Desc, raw_ostream &OS) const { - // Emit the lock opcode prefix as needed. if (TSFlags & X86II::LOCK) EmitByte(0xF0, CurByte, OS); @@ -1012,6 +1011,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!"); case X86II::Pseudo: llvm_unreachable("Pseudo instruction shouldn't be emitted"); + // @LOCALMOD-BEGIN + case X86II::CustomFrm: + assert(0 && "CustomFrm instruction shouldn't be emitted"); + // @LOCALMOD-END case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp new file mode 100644 index 0000000000..29d87ba2c6 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp @@ -0,0 +1,700 @@ +//=== X86MCNaCl.cpp - Expansion of NaCl pseudo-instructions --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "x86-sandboxing" + +#include "MCTargetDesc/X86MCTargetDesc.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "MCTargetDesc/X86MCNaCl.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +// This option makes it possible to overwrite the x86 jmp mask immediate. +// Setting it to -1 will effectively turn masking into a nop which will +// help with linking this code with non-sandboxed libs (at least for x86-32). +cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32)); + +cl::opt<bool> FlagUseZeroBasedSandbox("sfi-zero-based-sandbox", + cl::desc("Use a zero-based sandbox model" + " for the NaCl SFI."), + cl::init(false)); + +static unsigned PrefixSaved = 0; +static bool PrefixPass = false; + +// See the notes below where these functions are defined. +namespace { +unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false); +unsigned DemoteRegTo32_(unsigned RegIn); +} // namespace + +static void EmitDirectCall(const MCOperand &Op, bool Is64Bit, + MCStreamer &Out) { + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + + MCInst CALLInst; + CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); + CALLInst.addOperand(Op); + Out.EmitInstruction(CALLInst); + Out.EmitBundleUnlock(); +} + +static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall, + MCStreamer &Out) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + const int JmpMask = FlagSfiX86JmpMask; + const unsigned Reg32 = Op.getReg(); + const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64); + + if (IsCall) + Out.EmitBundleAlignEnd(); + + Out.EmitBundleLock(); + + MCInst ANDInst; + ANDInst.setOpcode(X86::AND32ri8); + ANDInst.addOperand(MCOperand::CreateReg(Reg32)); + ANDInst.addOperand(MCOperand::CreateReg(Reg32)); + ANDInst.addOperand(MCOperand::CreateImm(JmpMask)); + Out.EmitInstruction(ANDInst); + + if (Is64Bit && !UseZeroBasedSandbox) { + MCInst InstADD; + InstADD.setOpcode(X86::ADD64rr); + InstADD.addOperand(MCOperand::CreateReg(Reg64)); + InstADD.addOperand(MCOperand::CreateReg(Reg64)); + InstADD.addOperand(MCOperand::CreateReg(X86::R15)); + Out.EmitInstruction(InstADD); + } + + if (IsCall) { + MCInst CALLInst; + CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r); + CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32)); + Out.EmitInstruction(CALLInst); + } else { + MCInst JMPInst; + JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r); + JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32)); + Out.EmitInstruction(JMPInst); + } + Out.EmitBundleUnlock(); +} + +static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) { + MCInst POPInst; + POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r); + POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX)); + Out.EmitInstruction(POPInst); + + if (AmtOp) { + assert(!Is64Bit); + MCInst ADDInst; + unsigned ADDReg = X86::ESP; + ADDInst.setOpcode(X86::ADD32ri); + ADDInst.addOperand(MCOperand::CreateReg(ADDReg)); + ADDInst.addOperand(MCOperand::CreateReg(ADDReg)); + ADDInst.addOperand(*AmtOp); + Out.EmitInstruction(ADDInst); + } + + MCInst JMPInst; + JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r); + JMPInst.addOperand(MCOperand::CreateReg(X86::ECX)); + Out.EmitInstruction(JMPInst); +} + +static void EmitTrap(bool Is64Bit, MCStreamer &Out) { + // Rewrite to: + // X86-32: mov $0, 0 + // X86-64: mov $0, (%r15) + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + unsigned BaseReg = Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0; + + MCInst Tmp; + Tmp.setOpcode(X86::MOV32mi); + Tmp.addOperand(MCOperand::CreateReg(BaseReg)); // BaseReg + Tmp.addOperand(MCOperand::CreateImm(1)); // Scale + Tmp.addOperand(MCOperand::CreateReg(0)); // IndexReg + Tmp.addOperand(MCOperand::CreateImm(0)); // Offset + Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg + Tmp.addOperand(MCOperand::CreateImm(0)); // Value + + Out.EmitInstruction(Tmp); +} + +// Fix a register after being truncated to 32-bits. +static void EmitRegFix(unsigned Reg64, MCStreamer &Out) { + // lea (%rsp, %r15, 1), %rsp + // We do not need to add the R15 base for the zero-based sandbox model + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + if (!UseZeroBasedSandbox) { + MCInst Tmp; + Tmp.setOpcode(X86::LEA64r); + Tmp.addOperand(MCOperand::CreateReg(Reg64)); // DestReg + Tmp.addOperand(MCOperand::CreateReg(Reg64)); // BaseReg + Tmp.addOperand(MCOperand::CreateImm(1)); // Scale + Tmp.addOperand(MCOperand::CreateReg(X86::R15)); // IndexReg + Tmp.addOperand(MCOperand::CreateImm(0)); // Offset + Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg + Out.EmitInstruction(Tmp); + } +} + +static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp, + MCStreamer &Out) { + Out.EmitBundleLock(); + + MCInst Tmp; + Tmp.setOpcode(Opc); + Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); + Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); + Tmp.addOperand(ImmOp); + Out.EmitInstruction(Tmp); + + EmitRegFix(X86::RSP, Out); + Out.EmitBundleUnlock(); +} + +static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) { + Out.EmitBundleLock(); + + MCInst Tmp; + Tmp.setOpcode(X86::LEA64_32r); + Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); // DestReg + Tmp.addOperand(MCOperand::CreateReg(X86::RBP)); // BaseReg + Tmp.addOperand(MCOperand::CreateImm(1)); // Scale + Tmp.addOperand(MCOperand::CreateReg(0)); // IndexReg + Tmp.addOperand(ImmOp); // Offset + Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg + Out.EmitInstruction(Tmp); + + EmitRegFix(X86::RSP, Out); + Out.EmitBundleUnlock(); +} + +static void EmitPrefix(unsigned Opc, MCStreamer &Out) { + assert(PrefixSaved == 0); + assert(PrefixPass == false); + + MCInst PrefixInst; + PrefixInst.setOpcode(Opc); + PrefixPass = true; + Out.EmitInstruction(PrefixInst); + + assert(PrefixSaved == 0); + assert(PrefixPass == false); +} + +static void EmitMoveRegReg(bool Is64Bit, unsigned ToReg, + unsigned FromReg, MCStreamer &Out) { + MCInst Move; + Move.setOpcode(Is64Bit ? X86::MOV64rr : X86::MOV32rr); + Move.addOperand(MCOperand::CreateReg(ToReg)); + Move.addOperand(MCOperand::CreateReg(FromReg)); + Out.EmitInstruction(Move); +} + +static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) { + unsigned Reg32 = getX86SubSuperRegister_(Reg64, MVT::i32); + EmitMoveRegReg(false, Reg32, Reg32, Out); +} + +static void HandleMemoryRefTruncation(MCInst *Inst, unsigned IndexOpPosition, + MCStreamer &Out) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg(); + if (UseZeroBasedSandbox) { + // With the zero-based sandbox, we use a 32-bit register on the index + Inst->getOperand(IndexOpPosition).setReg(DemoteRegTo32_(IndexReg)); + } else { + EmitRegTruncate(IndexReg, Out); + } +} + +static void ShortenMemoryRef(MCInst *Inst, unsigned IndexOpPosition) { + unsigned ImmOpPosition = IndexOpPosition - 1; + unsigned BaseOpPosition = IndexOpPosition - 2; + unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg(); + // For the SIB byte, if the scale is 1 and the base is 0, then + // an equivalent setup moves index to base, and index to 0. The + // equivalent setup is optimized to remove the SIB byte in + // X86MCCodeEmitter.cpp. + if (Inst->getOperand(ImmOpPosition).getImm() == 1 && + Inst->getOperand(BaseOpPosition).getReg() == 0) { + Inst->getOperand(BaseOpPosition).setReg(IndexReg); + Inst->getOperand(IndexOpPosition).setReg(0); + } +} + +static void EmitLoad(bool Is64Bit, + unsigned DestReg, + unsigned BaseReg, + unsigned Scale, + unsigned IndexReg, + unsigned Offset, + unsigned SegmentReg, + MCStreamer &Out) { + // Load DestReg from address BaseReg + Scale * IndexReg + Offset + MCInst Load; + Load.setOpcode(Is64Bit ? X86::MOV64rm : X86::MOV32rm); + Load.addOperand(MCOperand::CreateReg(DestReg)); + Load.addOperand(MCOperand::CreateReg(BaseReg)); + Load.addOperand(MCOperand::CreateImm(Scale)); + Load.addOperand(MCOperand::CreateReg(IndexReg)); + Load.addOperand(MCOperand::CreateImm(Offset)); + Load.addOperand(MCOperand::CreateReg(SegmentReg)); + Out.EmitInstruction(Load); +} + +static bool SandboxMemoryRef(MCInst *Inst, + unsigned *IndexOpPosition) { + for (unsigned i = 0, last = Inst->getNumOperands(); i < last; i++) { + if (!Inst->getOperand(i).isReg() || + Inst->getOperand(i).getReg() != X86::PSEUDO_NACL_SEG) { + continue; + } + // Return the index register that will need to be truncated. + // The order of operands on a memory reference is always: + // (BaseReg, ScaleImm, IndexReg, DisplacementImm, SegmentReg), + // So if we found a match for a segment register value, we know that + // the index register is exactly two operands prior. + *IndexOpPosition = i - 2; + + // Remove the PSEUDO_NACL_SEG annotation. + Inst->getOperand(i).setReg(0); + return true; + } + return false; +} + +static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) { + Out.EmitBundleAlignEnd(); + Out.EmitBundleLock(); + + MCInst LeaInst; + LeaInst.setOpcode(X86::LEA32r); + LeaInst.addOperand(MCOperand::CreateReg(X86::EAX)); // DestReg + LeaInst.addOperand(Inst.getOperand(0)); // BaseReg + LeaInst.addOperand(Inst.getOperand(1)); // Scale + LeaInst.addOperand(Inst.getOperand(2)); // IndexReg + LeaInst.addOperand(Inst.getOperand(3)); // Offset + LeaInst.addOperand(Inst.getOperand(4)); // SegmentReg + Out.EmitInstruction(LeaInst); + + MCInst CALLInst; + CALLInst.setOpcode(X86::CALLpcrel32); + MCContext &context = Out.getContext(); + const MCSymbolRefExpr *expr = + MCSymbolRefExpr::Create( + context.GetOrCreateSymbol(StringRef("___tls_get_addr")), + MCSymbolRefExpr::VK_PLT, context); + CALLInst.addOperand(MCOperand::CreateExpr(expr)); + Out.EmitInstruction(CALLInst); + Out.EmitBundleUnlock(); +} + + +static void EmitREST(const MCInst &Inst, unsigned Reg32, + bool IsMem, MCStreamer &Out) { + unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64); + Out.EmitBundleLock(); + if (!IsMem) { + EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out); + } else { + unsigned IndexOpPosition; + MCInst SandboxedInst = Inst; + if (SandboxMemoryRef(&SandboxedInst, &IndexOpPosition)) { + HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out); + ShortenMemoryRef(&SandboxedInst, IndexOpPosition); + } + EmitLoad(false, + Reg32, + SandboxedInst.getOperand(0).getReg(), // BaseReg + SandboxedInst.getOperand(1).getImm(), // Scale + SandboxedInst.getOperand(2).getReg(), // IndexReg + SandboxedInst.getOperand(3).getImm(), // Offset + SandboxedInst.getOperand(4).getReg(), // SegmentReg + Out); + } + + EmitRegFix(Reg64, Out); + Out.EmitBundleUnlock(); +} + + +namespace llvm { +// CustomExpandInstNaClX86 - +// If Inst is a NaCl pseudo instruction, emits the substitute +// expansion to the MCStreamer and returns true. +// Otherwise, returns false. +// +// NOTE: Each time this function calls Out.EmitInstruction(), it will be +// called again recursively to rewrite the new instruction being emitted. +// Care must be taken to ensure that this does not result in an infinite +// loop. Also, global state must be managed carefully so that it is +// consistent during recursive calls. +// +// We need global state to keep track of the explicit prefix (PREFIX_*) +// instructions. Unfortunately, the assembly parser prefers to generate +// these instead of combined instructions. At this time, having only +// one explicit prefix is supported. +bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + // If we are emitting to .s, just emit all pseudo-instructions directly. + if (Out.hasRawTextSupport()) { + return false; + } + unsigned Opc = Inst.getOpcode(); + DEBUG(dbgs() << "CustomExpandInstNaClX86("; Inst.dump(); dbgs() << ")\n"); + switch (Opc) { + case X86::LOCK_PREFIX: + case X86::REP_PREFIX: + case X86::REPNE_PREFIX: + case X86::REX64_PREFIX: + // Ugly hack because LLVM AsmParser is not smart enough to combine + // prefixes back into the instruction they modify. + if (PrefixPass) { + PrefixPass = false; + PrefixSaved = 0; + return false; + } + assert(PrefixSaved == 0); + PrefixSaved = Opc; + return true; + case X86::NACL_TRAP32: + assert(PrefixSaved == 0); + EmitTrap(false, Out); + return true; + case X86::NACL_TRAP64: + assert(PrefixSaved == 0); + EmitTrap(true, Out); + return true; + case X86::NACL_CALL32d: + assert(PrefixSaved == 0); + EmitDirectCall(Inst.getOperand(0), false, Out); + return true; + case X86::NACL_CALL64d: + assert(PrefixSaved == 0); + EmitDirectCall(Inst.getOperand(0), true, Out); + return true; + case X86::NACL_CALL32r: + assert(PrefixSaved == 0); + EmitIndirectBranch(Inst.getOperand(0), false, true, Out); + return true; + case X86::NACL_CALL64r: + assert(PrefixSaved == 0); + EmitIndirectBranch(Inst.getOperand(0), true, true, Out); + return true; + case X86::NACL_JMP32r: + assert(PrefixSaved == 0); + EmitIndirectBranch(Inst.getOperand(0), false, false, Out); + return true; + case X86::NACL_TLS_addr32: + assert(PrefixSaved == 0); + EmitTLSAddr32(Inst, Out); + return true; + case X86::NACL_JMP64r: + case X86::NACL_JMP64z: + assert(PrefixSaved == 0); + EmitIndirectBranch(Inst.getOperand(0), true, false, Out); + return true; + case X86::NACL_RET32: + assert(PrefixSaved == 0); + EmitRet(NULL, false, Out); + return true; + case X86::NACL_RET64: + assert(PrefixSaved == 0); + EmitRet(NULL, true, Out); + return true; + case X86::NACL_RETI32: + assert(PrefixSaved == 0); + EmitRet(&Inst.getOperand(0), false, Out); + return true; + case X86::NACL_ASPi8: + assert(PrefixSaved == 0); + EmitSPArith(X86::ADD32ri8, Inst.getOperand(0), Out); + return true; + case X86::NACL_ASPi32: + assert(PrefixSaved == 0); + EmitSPArith(X86::ADD32ri, Inst.getOperand(0), Out); + return true; + case X86::NACL_SSPi8: + assert(PrefixSaved == 0); + EmitSPArith(X86::SUB32ri8, Inst.getOperand(0), Out); + return true; + case X86::NACL_SSPi32: + assert(PrefixSaved == 0); + EmitSPArith(X86::SUB32ri, Inst.getOperand(0), Out); + return true; + case X86::NACL_ANDSPi32: + assert(PrefixSaved == 0); + EmitSPArith(X86::AND32ri, Inst.getOperand(0), Out); + return true; + case X86::NACL_SPADJi32: + assert(PrefixSaved == 0); + EmitSPAdj(Inst.getOperand(0), Out); + return true; + case X86::NACL_RESTBPm: + assert(PrefixSaved == 0); + EmitREST(Inst, X86::EBP, true, Out); + return true; + case X86::NACL_RESTBPr: + case X86::NACL_RESTBPrz: + assert(PrefixSaved == 0); + EmitREST(Inst, X86::EBP, false, Out); + return true; + case X86::NACL_RESTSPm: + assert(PrefixSaved == 0); + EmitREST(Inst, X86::ESP, true, Out); + return true; + case X86::NACL_RESTSPr: + case X86::NACL_RESTSPrz: + assert(PrefixSaved == 0); + EmitREST(Inst, X86::ESP, false, Out); + return true; + } + + unsigned IndexOpPosition; + MCInst SandboxedInst = Inst; + if (SandboxMemoryRef(&SandboxedInst, &IndexOpPosition)) { + unsigned PrefixLocal = PrefixSaved; + PrefixSaved = 0; + + if (PrefixLocal || !UseZeroBasedSandbox) + Out.EmitBundleLock(); + + HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out); + ShortenMemoryRef(&SandboxedInst, IndexOpPosition); + + if (PrefixLocal) + EmitPrefix(PrefixLocal, Out); + Out.EmitInstruction(SandboxedInst); + + if (PrefixLocal || !UseZeroBasedSandbox) + Out.EmitBundleUnlock(); + return true; + } + + if (PrefixSaved) { + unsigned PrefixLocal = PrefixSaved; + PrefixSaved = 0; + EmitPrefix(PrefixLocal, Out); + } + return false; +} + +} // namespace llvm + + + + +// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +// +// This is an exact copy of getX86SubSuperRegister from X86RegisterInfo.h +// We cannot use the original because it is part of libLLVMX86CodeGen, +// which cannot be a dependency of this module (libLLVMX86Desc). +// +// However, in all likelyhood, the real getX86SubSuperRegister will +// eventually be moved to MCTargetDesc, and then this copy can be +// removed. + +namespace { +unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High) { + switch (VT.getSimpleVT().SimpleTy) { + default: return Reg; + case MVT::i8: + if (High) { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AH; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DH; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CH; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BH; + } + } else { + switch (Reg) { + default: return 0; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AL; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DL; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CL; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BL; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SIL; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DIL; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BPL; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SPL; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8B; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9B; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10B; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11B; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12B; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13B; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14B; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15B; + } + } + case MVT::i16: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::AX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::DX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::CX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::BX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::SI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::DI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::BP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::SP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8W; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9W; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10W; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11W; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12W; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13W; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14W; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15W; + } + case MVT::i32: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::EAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::EDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::ECX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::EBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::ESI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::EDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::EBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::ESP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8D; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9D; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10D; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11D; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12D; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13D; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14D; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15D; + } + case MVT::i64: + switch (Reg) { + default: return Reg; + case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX: + return X86::RAX; + case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX: + return X86::RDX; + case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX: + return X86::RCX; + case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX: + return X86::RBX; + case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI: + return X86::RSI; + case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI: + return X86::RDI; + case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP: + return X86::RBP; + case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP: + return X86::RSP; + case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8: + return X86::R8; + case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9: + return X86::R9; + case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10: + return X86::R10; + case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11: + return X86::R11; + case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12: + return X86::R12; + case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13: + return X86::R13; + case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14: + return X86::R14; + case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: + return X86::R15; + } + } + + return Reg; +} + +// This is a copy of DemoteRegTo32 from X86NaClRewritePass.cpp. +// We cannot use the original because it uses part of libLLVMX86CodeGen, +// which cannot be a dependency of this module (libLLVMX86Desc). +// Note that this function calls getX86SubSuperRegister_, which is +// also a copied function for the same reason. + +unsigned DemoteRegTo32_(unsigned RegIn) { + if (RegIn == 0) + return 0; + unsigned RegOut = getX86SubSuperRegister_(RegIn, MVT::i32, false); + assert(RegOut != 0); + return RegOut; +} +} //namespace +// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.h b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h new file mode 100644 index 0000000000..01b400d4d9 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h @@ -0,0 +1,19 @@ +//===-- X86MCNaCl.h - Prototype for CustomExpandInstNaClX86 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef X86MCNACL_H +#define X86MCNACL_H + +namespace llvm { + class MCInst; + class MCStreamer; + bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out); +} + +#endif diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index dce5b4d2b0..cbdfeaedbe 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -47,6 +47,11 @@ FunctionPass *createCleanupLocalDynamicTLSPass(); /// FunctionPass *createX86FloatingPointStackifierPass(); +// @LOCALMOD-BEGIN - Creates a pass to make instructions follow NaCl SFI rules. +FunctionPass* createX86NaClRewritePass(); +FunctionPass* createX86NaClRewriteFinalPass(); +// @LOCALMOD-END + /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions /// before each call to avoid transition penalty between functions encoded with /// AVX and SSE. diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index fdd712520b..9a63060c90 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -71,6 +71,35 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { return false; } +// @LOCALMOD-BEGIN +bool X86AsmPrinter::UseReadOnlyJumpTables() const { + return Subtarget->isTargetNaCl(); +} + +unsigned X86AsmPrinter::GetTargetBasicBlockAlign() const { + if (Subtarget->isTargetNaCl()) + return 5; + return 0; +} + +unsigned X86AsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const { + if (Subtarget->isTargetNaCl()) { + switch (MI->getOpcode()) { + default: return 0; + // These labels may indicate an indirect entry point that is + // externally reachable and hence must be bundle aligned. + // Note: these labels appear to be always at basic block beginnings + // so it may be possible to simply set the MBB alignment. + // However, it is unclear whether this always holds. + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + return 5; + } + } + return 0; +} +// @LOCALMOD-END + /// printSymbolOperand - Print a raw symbol reference operand. This handles /// jump tables, constant pools, global address and external symbols, all of /// which print to a label with various suffixes for relocation types etc. diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 61eb14e036..b166a531e1 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -46,6 +46,12 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE; + virtual bool UseReadOnlyJumpTables() const; // @LOCALMOD + + virtual unsigned GetTargetBasicBlockAlign() const; // @LOCLAMOD + + virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;//@LOCALMOD + void printSymbolOperand(const MachineOperand &MO, raw_ostream &O); // These methods are used by the tablegen'erated instruction printer. diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 44db563818..ee6408b403 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-emitter" +#define DEBUG_TYPE "jit" #include "X86InstrInfo.h" #include "X86JITInfo.h" #include "X86Subtarget.h" @@ -34,6 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetOpcodes.h" // @LOCALMOD using namespace llvm; STATISTIC(NumEmitted, "Number of machine instructions emitted"); @@ -1120,6 +1121,28 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, unsigned Opcode = Desc->Opcode; + // @LOCALMOD-START + if (TM.getSubtargetImpl()->isTargetNaCl()) { + switch (Opcode) { + case TargetOpcode::BUNDLE_LOCK: + MCE.beginBundleLock(); + return; + case TargetOpcode::BUNDLE_UNLOCK: + MCE.endBundleLock(); + return; + case TargetOpcode::BUNDLE_ALIGN_START: + MCE.alignToBundleBeginning(); + return; + case TargetOpcode::BUNDLE_ALIGN_END: + MCE.alignToBundleEnd(); + return; + } + // In addition to groups of instructions, each instruction must itself be + // bundle-locked because they are emitted with multiple calls into MCE + MCE.beginBundleLock(); + } + // @LOCALMOD-END + // If this is a two-address instruction, skip one of the register operands. unsigned NumOps = Desc->getNumOperands(); unsigned CurOp = 0; @@ -1479,5 +1502,11 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, llvm_unreachable(0); } + // @LOCALMOD-START + if (TM.getSubtargetImpl()->isTargetNaCl()) { + MCE.endBundleLock(); + } + // @LOCALMOD-END + MCE.processDebugLoc(MI.getDebugLoc(), false); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index d4627c74cb..ad652366ad 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -580,6 +580,20 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // If all else fails, try to materialize the value in a register. if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { + // @LOCALMOD-START + if (Subtarget->isTargetNaCl()) { + // We can materialize into a memory address only if + // no registers have been defined (and hence, we + // aren't modifying an existing memory reference). + if ((AM.Base.Reg == 0) && (AM.IndexReg == 0)) { + // Put into index register so that the NaCl rewrite pass will + // convert this to a 64-bit address. + AM.IndexReg = getRegForValue(V); + return AM.IndexReg != 0; + } + return false; + } + // @LOCALMOD-END if (AM.Base.Reg == 0) { AM.Base.Reg = getRegForValue(V); return AM.Base.Reg != 0; @@ -818,9 +832,16 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { unsigned Reg = X86MFInfo->getSRetReturnReg(); assert(Reg && "SRetReturnReg should have been set in LowerFormalArguments()!"); + // @LOCALMOD-BEGIN -- Ensure that the register classes match. + // At this point, SRetReturnReg is EDI, because PointerTy() for NaCl + // is i32. We then copy to EAX instead of RAX. Alternatively, we could + // have zero-extended EDI to RDI then copy to RAX, but this has a smaller + // encoding (2 bytes vs 3 bytes). + unsigned CopyTo = Subtarget->has64BitPointers() ? X86::RAX : X86::EAX; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - X86::RAX).addReg(Reg); - MRI.addLiveOut(X86::RAX); + CopyTo).addReg(Reg); + MRI.addLiveOut(CopyTo); + // @LOCALMOD-END } // Now emit the RET. @@ -1832,10 +1853,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc; - if (Subtarget->is64Bit()) - CallOpc = X86::CALL64r; - else - CallOpc = X86::CALL32r; + // @LOCALMOD-BEGIN + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetNaCl()) { + CallOpc = X86::NACL_CG_CALL64r; + } else { + CallOpc = X86::CALL64r; + } + } else { + if (Subtarget->isTargetNaCl()) { + CallOpc = X86::NACL_CG_CALL32r; + } else { + CallOpc = X86::CALL32r; + } + } + // @LOCALMOD-END MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addReg(CalleeOp); @@ -1843,10 +1875,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Direct call. assert(GV && "Not a direct call"); unsigned CallOpc; - if (Subtarget->is64Bit()) - CallOpc = X86::CALL64pcrel32; - else - CallOpc = X86::CALLpcrel32; + // @LOCALMOD-BEGIN + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetNaCl()) { + CallOpc = X86::NACL_CG_CALL64pcrel32; + } else { + CallOpc = X86::CALL64pcrel32; + } + } else { + if (Subtarget->isTargetNaCl()) { + CallOpc = X86::NACL_CG_CALLpcrel32; + } else { + CallOpc = X86::CALLpcrel32; + } + } + // @LOCALMOD-END // See if we need any target-specific flags on the GV operand. unsigned char OpFlags = 0; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 369589d469..5bfb5054b0 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -114,6 +114,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB, case X86::TCRETURNmi: case X86::TCRETURNdi64: case X86::TCRETURNri64: + case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD + case X86::NACL_CG_TCRETURNri64: // @LOCALMOD case X86::TCRETURNmi64: case X86::EH_RETURN: case X86::EH_RETURN64: { @@ -994,6 +996,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, case X86::TCRETURNdi64: case X86::TCRETURNri64: case X86::TCRETURNmi64: + case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD + case X86::NACL_CG_TCRETURNri64: // @LOCALMOD case X86::EH_RETURN: case X86::EH_RETURN64: break; // These are ok @@ -1086,6 +1090,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::NACL_CG_TCRETURNri64 || // @LOCALMOD + RetOpcode == X86::NACL_CG_TCRETURNdi64 || // @LOCALMOD RetOpcode == X86::TCRETURNmi64) { bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64; // Tail call return: adjust the stack pointer and jump to callee. @@ -1111,10 +1117,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } // Jump to label or value in register. - if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) { + if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64 || + RetOpcode == X86::NACL_CG_TCRETURNdi64) { // @LOCALMOD + // @LOCALMOD-BEGIN + unsigned TailJmpOpc; + switch (RetOpcode) { + case X86::TCRETURNdi : TailJmpOpc = X86::TAILJMPd; break; + case X86::TCRETURNdi64: TailJmpOpc = X86::TAILJMPd64; break; + case X86::NACL_CG_TCRETURNdi64: + TailJmpOpc = X86::NACL_CG_TAILJMPd64; + break; + default: llvm_unreachable("Unexpected return opcode"); + } + // @LOCALMOD-END MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi) - ? X86::TAILJMPd : X86::TAILJMPd64)); + BuildMI(MBB, MBBI, DL, TII.get(TailJmpOpc)); // @LOCALMOD + if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), JumpTarget.getTargetFlags()); @@ -1132,6 +1150,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } else if (RetOpcode == X86::TCRETURNri64) { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). addReg(JumpTarget.getReg(), RegState::Kill); +// @LOCALMOD-BEGIN + } else if (RetOpcode == X86::NACL_CG_TCRETURNri64) { + BuildMI(MBB, MBBI, DL, TII.get(X86::NACL_CG_TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); +// @LOCALMOD-END } else { BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index dc515dc39c..d46c41f508 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -29,7 +29,8 @@ public: explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti) : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), - (sti.is64Bit() ? -8 : -4)), + (sti.is64Bit() ? -8 : -4), + 1, (sti.is64Bit() ? 8 : 4)), // @LOCALMOD TM(tm), STI(sti) { } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 99f557417b..42134256e3 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -41,6 +41,7 @@ using namespace llvm; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); + //===----------------------------------------------------------------------===// // Pattern Matcher Implementation //===----------------------------------------------------------------------===// @@ -214,6 +215,10 @@ namespace { SDValue &Index, SDValue &Disp, SDValue &Segment, SDValue &NodeWithChain); + // @LOCALMOD-BEGIN + void LegalizeAddressingModeForNaCl(SDValue N, X86ISelAddressMode &AM); + // @LOCALMOD-END + bool TryFoldLoad(SDNode *P, SDValue N, SDValue &Base, SDValue &Scale, @@ -231,8 +236,9 @@ namespace { inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { + EVT MemOpVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; // @LOCALMOD Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ? - CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) : + CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, MemOpVT) : // @LOCALMOD AM.Base_Reg; Scale = getI8Imm(AM.Scale); Index = AM.IndexReg; @@ -292,6 +298,15 @@ namespace { const X86InstrInfo *getInstrInfo() { return getTargetMachine().getInstrInfo(); } + + // @LOCALMOD-START + bool selectingMemOp; + bool RestrictUseOfBaseReg() { + return selectingMemOp && Subtarget->isTargetNaCl64(); + } + // @LOCALMOD-END + + }; } @@ -442,6 +457,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { SDNode *N = I++; // Preincrement iterator to avoid invalidation issues. if (OptLevel != CodeGenOpt::None && + !Subtarget->isTargetNaCl() && // @LOCALMOD: We can't fold load/call (N->getOpcode() == X86ISD::CALL || (N->getOpcode() == X86ISD::TC_RETURN && // Only does this if load can be foled into TC_RETURN. @@ -598,6 +614,14 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset, bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){ SDValue Address = N->getOperand(1); + // @LOCALMOD-START + // Disable this tls access optimization in Native Client, since + // gs:0 (or fs:0 on X86-64) does not exactly contain its own address. + if (Subtarget->isTargetNaCl()) { + return true; + } + // @LOCALMOD-END + // load gs:0 -> GS segment register. // load fs:0 -> FS segment register. // @@ -722,6 +746,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { if (MatchAddressRecursively(N, AM, 0)) return true; + + if (!RestrictUseOfBaseReg()) { // @LOCALMOD // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has // a smaller encoding and avoids a scaled-index. if (AM.Scale == 2 && @@ -730,7 +756,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { AM.Base_Reg = AM.IndexReg; AM.Scale = 1; } - + } // @LOCALMOD + // Post-processing: Convert foo to foo(%rip), even in non-PIC mode, // because it has a smaller encoding. // TODO: Which other code models can use this? @@ -1077,6 +1104,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // FALL THROUGH case ISD::MUL: case X86ISD::MUL_IMM: + // @LOCALMOD + if (!RestrictUseOfBaseReg()) { // X*[3,5,9] -> X+X*[2,4,8] if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() == 0 && @@ -1109,6 +1138,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, return false; } } + } // @LOCALMOD break; case ISD::SUB: { @@ -1195,6 +1225,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, return false; AM = Backup; + if (!RestrictUseOfBaseReg()) { // @LOCALMOD // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least // the add. @@ -1207,6 +1238,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM.Scale = 1; return false; } + } // @LOCALMOD N = Handle.getValue(); break; } @@ -1266,7 +1298,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the /// specified addressing mode without any further recursion. bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) { - // Is the base register already occupied? + if (RestrictUseOfBaseReg()) { // @LOCALMOD + if (AM.IndexReg.getNode() == 0) { + AM.IndexReg = N; + AM.Scale = 1; + return false; + } + return true; + } // @LOCALMOD +// Is the base register already occupied? if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { // If so, check to see if the scale index register is set. if (AM.IndexReg.getNode() == 0) { @@ -1296,6 +1336,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment) { X86ISelAddressMode AM; + // @LOCALMOD + selectingMemOp = true; if (Parent && // This list of opcodes are all the nodes that have an "addr:$ptr" operand @@ -1317,7 +1359,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, if (MatchAddress(N, AM)) return false; - EVT VT = N.getValueType(); + // @LOCALMOD-START + if (Subtarget->isTargetNaCl64()) { + LegalizeAddressingModeForNaCl(N, AM); + } + // @LOCALMOD-END + + EVT VT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; // @LOCALMOD + if (AM.BaseType == X86ISelAddressMode::RegBase) { if (!AM.Base_Reg.getNode()) AM.Base_Reg = CurDAG->getRegister(0, VT); @@ -1327,6 +1376,32 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base, AM.IndexReg = CurDAG->getRegister(0, VT); getAddressOperands(AM, Base, Scale, Index, Disp, Segment); + + // @LOCALMOD-BEGIN + // For Native Client 64-bit, zero-extend 32-bit pointers + // to 64-bits for memory operations. Most of the time, this + // won't generate any additional instructions because the backend + // knows that operations on 32-bit registers implicitly zero-extends. + // If we don't do this, there are a few corner cases where LLVM might + // assume the upper bits won't be modified or used, but since we + // always clear the upper bits, this is not a good assumption. + // http://code.google.com/p/nativeclient/issues/detail?id=1564 + if (Subtarget->isTargetNaCl64()) { + assert(Base.getValueType() == MVT::i64 && "Unexpected base operand size"); + + if (Index.getValueType() != MVT::i64) { + Index = CurDAG->getZExtOrTrunc(Index, Index.getDebugLoc(), MVT::i64); + // Insert the new node into the topological ordering. + if (Parent && + (Index->getNodeId() == -1 || + Index->getNodeId() > Parent->getNodeId())) { + CurDAG->RepositionNode(Parent, Index.getNode()); + Index->setNodeId(Parent->getNodeId()); + } + } + } + // @LOCALMOD-END + return true; } @@ -1389,6 +1464,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N, SDValue Copy = AM.Segment; SDValue T = CurDAG->getRegister(0, MVT::i32); AM.Segment = T; + // @LOCALMOD + selectingMemOp = false; if (MatchAddress(N, AM)) return false; assert (T == AM.Segment); @@ -1452,7 +1529,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base, AM.Base_Reg = CurDAG->getRegister(0, N.getValueType()); AM.SymbolFlags = GA->getTargetFlags(); - if (N.getValueType() == MVT::i32) { + if (N.getValueType() == MVT::i32 && + !Subtarget->isTargetNaCl64()) { // @LOCALMOD AM.Scale = 1; AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); } else { @@ -1477,6 +1555,141 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, N.getOperand(1), Base, Scale, Index, Disp, Segment); } +// @LOCALMOD-BEGIN +// LegalizeAddressingModeForNaCl - NaCl specific addressing fixes. This ensures +// two addressing mode invariants. +// +// case 1. Addressing using only a displacement (constant address references) +// is only legal when the displacement is positive. This is because, when +// later we replace +// movl 0xffffffff, %eax +// by +// movl 0xffffffff(%r15), %eax +// the displacement becomes a negative offset from %r15, making this a +// reference to the guard region below %r15 rather than to %r15 + 4GB - 1, +// as the programmer expected. To handle these cases we pull negative +// displacements out whenever there is no base or index register in the +// addressing mode. I.e., the above becomes +// movl $0xffffffff, %ebx +// movl %rbx, %rbx +// movl (%r15, %rbx, 1), %eax +// +// case 2. Because NaCl needs to zero the top 32-bits of the index, we can't +// allow the index register to be negative. However, if we are using a base +// frame index, global address or the constant pool, and AM.Disp > 0, then +// negative values of "index" may be expected to legally occur. +// To avoid this, we fold the displacement (and scale) back into the +// index. This results in a LEA before the current instruction. +// Unfortunately, this may add a requirement for an additional register. +// +// For example, this sandboxed code is broken if %eax is negative: +// +// movl %eax,%eax +// incl -30(%rbp,%rax,4) +// +// Instead, we now generate: +// leal -30(%rbp,%rax,4), %tmp +// movl %tmp,%tmp +// incl (%r15,%tmp,1) +// +// TODO(espindola): This might not be complete since the matcher can select +// any dag node to go in the index. This is also not how the rest of the +// matcher logic works, if the matcher selects something, it must be +// valid and not depend on further patching. A more desirable fix is +// probably to update the matching code to avoid assigning a register +// to a value that we cannot prove is positive. +void X86DAGToDAGISel::LegalizeAddressingModeForNaCl(SDValue N, + X86ISelAddressMode &AM) { + + + // RIP-relative addressing is always fine. + if (AM.isRIPRelative()) + return; + + DebugLoc dl = N->getDebugLoc(); + // Case 1 above: + if (!AM.hasBaseOrIndexReg() && !AM.hasSymbolicDisplacement() && AM.Disp < 0) { + SDValue Imm = CurDAG->getTargetConstant(AM.Disp, MVT::i32); + SDValue MovNode = + SDValue(CurDAG->getMachineNode(X86::MOV32ri, dl, MVT::i32, Imm), 0); + AM.IndexReg = MovNode; + AM.Disp = 0; + InsertDAGNode(*CurDAG, N, MovNode); + return; + } + + // MatchAddress wants to use the base register when there's only + // one register and no scale. We need to use the index register instead. + if (AM.BaseType == X86ISelAddressMode::RegBase && + AM.Base_Reg.getNode() && + !AM.IndexReg.getNode()) { + AM.IndexReg = AM.Base_Reg; + AM.setBaseReg(SDValue()); + } + + // Case 2 above comprises two sub-cases: + // sub-case 1: Prevent negative indexes + bool NeedsFixing1 = + (AM.BaseType == X86ISelAddressMode::FrameIndexBase || AM.GV || AM.CP) && + AM.IndexReg.getNode() && + AM.Disp > 0; + + // sub-case 2: Both index and base registers are being used + bool NeedsFixing2 = + (AM.BaseType == X86ISelAddressMode::RegBase) && + AM.Base_Reg.getNode() && + AM.IndexReg.getNode(); + + if (!NeedsFixing1 && !NeedsFixing2) + return; + + static const unsigned LogTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; + assert(AM.Scale < sizeof(LogTable)/sizeof(LogTable[0])); + unsigned ScaleLog = LogTable[AM.Scale]; + assert(ScaleLog <= 3); + SmallVector<SDNode*, 8> NewNodes; + + SDValue NewIndex = AM.IndexReg; + if (ScaleLog > 0) { + SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8); + NewNodes.push_back(ShlCount.getNode()); + SDValue ShlNode = CurDAG->getNode(ISD::SHL, dl, N.getValueType(), + NewIndex, ShlCount); + NewNodes.push_back(ShlNode.getNode()); + NewIndex = ShlNode; + } + if (AM.Disp > 0) { + SDValue DispNode = CurDAG->getConstant(AM.Disp, N.getValueType()); + NewNodes.push_back(DispNode.getNode()); + + SDValue AddNode = CurDAG->getNode(ISD::ADD, dl, N.getValueType(), + NewIndex, DispNode); + NewNodes.push_back(AddNode.getNode()); + NewIndex = AddNode; + } + + if (NeedsFixing2) { + SDValue AddBase = CurDAG->getNode(ISD::ADD, dl, N.getValueType(), + NewIndex, AM.Base_Reg); + NewNodes.push_back(AddBase.getNode()); + NewIndex = AddBase; + AM.setBaseReg(SDValue()); + } + AM.Disp = 0; + AM.Scale = 1; + AM.IndexReg = NewIndex; + + // Insert the new nodes into the topological ordering. + for (unsigned i=0; i < NewNodes.size(); i++) { + if (NewNodes[i]->getNodeId() == -1 || + NewNodes[i]->getNodeId() > N.getNode()->getNodeId()) { + CurDAG->RepositionNode(N.getNode(), NewNodes[i]); + NewNodes[i]->setNodeId(N.getNode()->getNodeId()); + } + } +} +// @LOCALMOD-END + /// getGlobalBaseReg - Return an SDNode that returns the value of /// the global base register. Output instructions required to /// initialize the global base register, if necessary. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f729e3133..5610bb5ba3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -146,6 +146,12 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { if (Subtarget->isTargetLinux()) return new X86LinuxTargetObjectFile(); + + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) + return new TargetLoweringObjectFileNaCl(); + // @LOCALMOD-END + if (Subtarget->isTargetELF()) return new TargetLoweringObjectFileELF(); if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) @@ -158,6 +164,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); + // @LOCALMOD-START + X86StackPtr = Subtarget->has64BitPointers() ? X86::RSP : X86::ESP; + // @LOCALMOD-END RegInfo = TM.getRegisterInfo(); TD = getDataLayout(); @@ -179,7 +188,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::ILP); else setSchedulingPreference(Sched::RegPressure); - setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); + setStackPointerRegisterToSaveRestore(X86StackPtr); // @LOCALMOD // Bypass i32 with i8 on Atom when compiling with O2 if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) @@ -542,7 +551,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - if (Subtarget->is64Bit()) { + if (Subtarget->has64BitPointers()) { setExceptionPointerRegister(X86::RAX); setExceptionSelectorRegister(X86::RDX); } else { @@ -573,13 +582,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + setOperationAction(ISD::DYNAMIC_STACKALLOC, + Subtarget->has64BitPointers() ? // @LOCALMOD MVT::i64 : MVT::i32, Custom); else if (TM.Options.EnableSegmentedStacks) - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + setOperationAction(ISD::DYNAMIC_STACKALLOC, + Subtarget->has64BitPointers() ? // @LOCALMOD MVT::i64 : MVT::i32, Custom); else - setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ? + setOperationAction(ISD::DYNAMIC_STACKALLOC, + Subtarget->has64BitPointers() ? // @LOCALMOD MVT::i64 : MVT::i32, Expand); if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) { @@ -1273,6 +1285,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) { + setOperationAction(ISD::NACL_TP_TLS_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::NACL_TP_TDB_OFFSET, MVT::i32, Custom); + setOperationAction(ISD::NACL_TARGET_ARCH, MVT::i32, Custom); + } + // @LOCALMOD-END + computeRegisterProperties(); // On Darwin, -Os means optimize for size without hurting performance, @@ -1617,7 +1637,16 @@ X86TargetLowering::LowerReturn(SDValue Chain, "SRetReturnReg should have been set in LowerFormalArguments()."); SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); - Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + // @LOCALMOD-START + if (Subtarget->isTargetNaCl()) { + // NaCl 64 uses 32-bit pointers, so there might be some zero-ext needed. + SDValue Zext = DAG.getZExtOrTrunc(Val, dl, MVT::i64); + Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Zext, Flag); + } else { + Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); + } + // @LOCALMOD-END + Flag = Chain.getValue(1); // RAX now acts like a return value. @@ -1981,7 +2010,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); unsigned Reg = FuncInfo->getSRetReturnReg(); if (!Reg) { - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + // @LOCALMOD + Reg = MF.getRegInfo().createVirtualRegister( + getRegClassFor(getPointerTy())); FuncInfo->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); @@ -2350,7 +2381,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (!IsSibcall && (!isTailCall || isByVal)) { assert(VA.isMemLoc()); if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), + StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD getPointerTy()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, dl, DAG, VA, Flags)); @@ -2440,7 +2471,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); if (StackPtr.getNode() == 0) StackPtr = DAG.getCopyFromReg(Chain, dl, - RegInfo->getStackRegister(), + X86StackPtr, // @LOCALMOD getPointerTy()); Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); @@ -3049,7 +3080,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { FuncInfo->setRAIndex(ReturnAddrIndex); } - return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); + return DAG.getFrameIndex(ReturnAddrIndex, // @LOCALMOD + Subtarget->is64Bit() ? MVT::i64 : MVT::i32); } @@ -7563,7 +7595,8 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { static SDValue GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, - unsigned char OperandFlags, bool LocalDynamic = false) { + unsigned char OperandFlags, + unsigned Opcode = X86ISD::TLSADDR) { // @LOCALMOD MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); DebugLoc dl = GA->getDebugLoc(); @@ -7571,16 +7604,12 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, GA->getValueType(0), GA->getOffset(), OperandFlags); - - X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR - : X86ISD::TLSADDR; - if (InFlag) { SDValue Ops[] = { Chain, TGA, *InFlag }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3); + Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 3); // @LOCALMOD } else { SDValue Ops[] = { Chain, TGA }; - Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2); + Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 2); // @LOCALMOD } // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. @@ -7612,6 +7641,52 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, X86::RAX, X86II::MO_TLSGD); } +// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model. +static SDValue +LowerToTLSExecCall(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT, TLSModel::Model model, bool is64Bit) { + + // See: http://code.google.com/p/nativeclient/issues/detail?id=1685 + unsigned char TargetFlag; + unsigned Opcode; + if (model == TLSModel::LocalExec) { + TargetFlag = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; + Opcode = X86ISD::TLSADDR_LE; + } else if (model == TLSModel::InitialExec) { + TargetFlag = is64Bit ? X86II::MO_GOTTPOFF : X86II::MO_INDNTPOFF; + Opcode = X86ISD::TLSADDR_IE; + } else { + llvm_unreachable("Unknown TLS model"); + } + + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, + X86::EAX, // PtrVT is 32-bit. + TargetFlag, Opcode); +} + +// @LOCALMOD-START +// Lower TLS accesses to a function call, rather than use segment registers. +// Lower ISD::GlobalTLSAddress for NaCl 64 bit. +static SDValue +LowerToTLSNaCl64(GlobalAddressSDNode *GA, SelectionDAG &DAG, + const EVT PtrVT, TLSModel::Model model) { + + // See: http://code.google.com/p/nativeclient/issues/detail?id=1685 + unsigned char TargetFlag; + unsigned Opcode; + if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) { + TargetFlag = X86II::MO_TLSGD; + Opcode = X86ISD::TLSADDR; + } else { + return LowerToTLSExecCall(GA, DAG, PtrVT, model, true); + } + + return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, + X86::EAX, // PtrVT is 32-bit. + TargetFlag, Opcode); +} +// @LOCALMOD-END + static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, const EVT PtrVT, @@ -7626,14 +7701,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, SDValue Base; if (is64Bit) { Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX, - X86II::MO_TLSLD, /*LocalDynamic=*/true); + X86II::MO_TLSLD, + /*Opcode=*/X86ISD::TLSBASEADDR); // @LOCALMOD } else { SDValue InFlag; SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag); InFlag = Chain.getValue(1); Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, - X86II::MO_TLSLDM, /*LocalDynamic=*/true); + X86II::MO_TLSLDM, + /*Opcode=*/X86ISD::TLSBASEADDR); // @LOCALMOD } // Note: the CleanupLocalDynamicTLSPass will remove redundant computations @@ -7717,6 +7794,11 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->isTargetELF()) { TLSModel::Model model = getTargetMachine().getTLSModel(GV); + // @LOCALMOD-START + if (Subtarget->isTargetNaCl64()) + return LowerToTLSNaCl64(GA, DAG, getPointerTy(), model); + // @LOCALMOD-END + switch (model) { case TLSModel::GeneralDynamic: if (Subtarget->is64Bit()) @@ -7727,9 +7809,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + // @LOCALMOD-START + if (llvm::TLSUseCall) { + return LowerToTLSExecCall(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); + } else { + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), getTargetMachine().getRelocationModel() == Reloc::PIC_); + } + // @LOCALMOD-END } llvm_unreachable("Unknown TLS model."); } @@ -8667,13 +8756,31 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, break; } + // @LOCALMOD-BEGIN + // This function only peeks at the data dependencies of the DAG to find + // an arith op that also defines EFLAGS. However, function calls may + // clobber EFLAGS and the data dependencies do not show that. + // When that occurs, EFLAGS must be copied via PUSHF and POPF. + // The problem is that NaCl does not allow PUSHF and POPF. + // We could try to detect such clobbers for NaCl, but for now, we + // keep this code simple, and bail out for NaCl. A further + // PeepholeOptimizer pass can do a similar optimization + // (see optimizeCompareInstr in X86InstrInfo.cpp), so it's not *so* + // bad. This function also converts "add op, -1" to DEC, which can + // help fold load/stores: + // (store m, (add (load m), -1)) -> (dec m) + // So we lose out on that. + // BUG=http://code.google.com/p/nativeclient/issues/detail?id=2711 + bool ConservativeForNaCl = Subtarget->isTargetNaCl(); + // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() != 0 || NeedOF || NeedCF) + if (Op.getResNo() != 0 || NeedOF || NeedCF || ConservativeForNaCl) // Emit a CMP with 0, which is the TEST pattern. return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); + // @LOCALMOD-END unsigned Opcode = 0; unsigned NumOperands = 0; @@ -8903,6 +9010,10 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, /// if it's possible. SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) const { + // @LOCALMOD: NaCl validator rejects BT, BTS, and BTC. + if (Subtarget->isTargetNaCl()) + return SDValue(); + SDValue Op0 = And.getOperand(0); SDValue Op1 = And.getOperand(1); if (Op0.getOpcode() == ISD::TRUNCATE) @@ -9734,14 +9845,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Size = Op.getOperand(1); // FIXME: Ensure alignment here - bool Is64Bit = Subtarget->is64Bit(); - EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; + bool Has64BitPointers = Subtarget->has64BitPointers(); // @LOCALMOD + EVT SPTy = Has64BitPointers ? MVT::i64 : MVT::i32; // @LOCALMOD if (getTargetMachine().Options.EnableSegmentedStacks) { MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); - if (Is64Bit) { + if (Subtarget->is64Bit()) { // @LOCALMOD // The 64 bit implementation of segmented stacks needs to clobber both r10 // r11. This makes it impossible to use it along with nested parameters. const Function *F = MF.getFunction(); @@ -9754,7 +9865,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, } const TargetRegisterClass *AddrRegClass = - getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32); + getRegClassFor(Has64BitPointers ? MVT::i64:MVT::i32); // @LOCALMOD unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, @@ -9763,7 +9874,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops1, 2, dl); } else { SDValue Flag; - unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); + unsigned Reg = (Has64BitPointers ? X86::RAX : X86::EAX); // @LOCALMOD Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); @@ -9772,7 +9883,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); Flag = Chain.getValue(1); - Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), + Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD SPTy).getValue(1); SDValue Ops1[2] = { Chain.getValue(0), Chain }; @@ -9801,6 +9912,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // fp_offset (48 - 48 + 8 * 16) // overflow_arg_area (point to parameters coming in memory). // reg_save_area + unsigned PointerSize = TD->getPointerSize(0); // @LOCALMOD SmallVector<SDValue, 8> MemOps; SDValue FIN = Op.getOperand(1); // Store gp_offset @@ -9823,7 +9935,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), - getPointerTy()); + getPointerTy()); // @LOCALMOD Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8), false, false, 0); @@ -9831,11 +9943,12 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { // Store ptr to reg_save_area. FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), - FIN, DAG.getIntPtrConstant(8)); + FIN, DAG.getIntPtrConstant(PointerSize)); // @LOCALMOD SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), - getPointerTy()); + getPointerTy()); // @LOCALMOD Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, - MachinePointerInfo(SV, 16), false, false, 0); + MachinePointerInfo(SV, 8+PointerSize), // @LOCALMOD + false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], MemOps.size()); @@ -9845,7 +9958,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->is64Bit() && "LowerVAARG only handles 64-bit va_arg!"); assert((Subtarget->isTargetLinux() || - Subtarget->isTargetDarwin()) && + Subtarget->isTargetDarwin() || + Subtarget->isTargetNaCl()) && // @LOCALMOD "Unhandled target in LowerVAARG"); assert(Op.getNode()->getNumOperands() == 4); SDValue Chain = Op.getOperand(0); @@ -9920,11 +10034,56 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget, DebugLoc DL = Op.getDebugLoc(); return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, - DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, + // @LOCALMOD-START + // Size is actually 8 + 2 * pointer size and align + // is the pointer ABI alignment but we don't have a + // pointer to TD in this static function + DAG.getIntPtrConstant(Subtarget->has64BitPointers() ? + 24 : 16), + Subtarget->has64BitPointers() ? 8 : 4, + /*isVolatile*/false, + // @LOCALMOD-END false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } +////////////////////////////////////////////////////////////////////// +// NaCl TLS setup / layout intrinsics. +// See: native_client/src/untrusted/stubs/tls_params.h +SDValue X86TargetLowering::LowerNaClTpTlsOffset(SDValue Op, + SelectionDAG &DAG) const { + // ssize_t __nacl_tp_tls_offset (size_t tls_size) { + // return -tls_size; + // } + DebugLoc dl = Op.getDebugLoc(); + return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(), + DAG.getConstant(0, Op.getValueType().getSimpleVT()), + Op.getOperand(0)); +} + +SDValue X86TargetLowering::LowerNaClTpTdbOffset(SDValue Op, + SelectionDAG &DAG) const { + // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) { + // return 0; + // } + return DAG.getConstant(0, Op.getValueType().getSimpleVT()); +} + +SDValue +X86TargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const { + // int __nacl_target_arch () { + // return (is_64_bit ? + // PnaclTargetArchitectureX86_64 : + // PnaclTargetArchitectureX86_32); + // } + return DAG.getConstant((Subtarget->is64Bit() ? + PnaclTargetArchitectureX86_64 : + PnaclTargetArchitectureX86_32), + Op.getValueType().getSimpleVT()); +} + +////////////////////////////////////////////////////////////////////// + // getTargetVShiftNOde - Handle vector element shifts where the shift amount // may or may not be a constant. Takes immediate version of shift as input. static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, @@ -9970,11 +10129,37 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } -static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { +SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, + SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + + // @LOCALMOD-BEGIN + case Intrinsic::nacl_read_tp: { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + if (Subtarget->is64Bit() || llvm::TLSUseCall) { + // Call __nacl_read_tp() to get the thread pointer. + unsigned PtrSize = PtrVT.getSizeInBits(); + IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize); + SDValue ReadTpFunction = DAG.getExternalSymbol("__nacl_read_tp", PtrVT); + ArgListTy Args; + TargetLowering::CallLoweringInfo CLI( + DAG.getEntryNode(), PtrTy, + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*doesNotRet=*/false, + /*isReturnValueUsed=*/true, + ReadTpFunction, Args, DAG, dl); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + return CallResult.first; + } else { + // Get %gs:0, which contains the thread pointer on x86-32. + return DAG.getNode(X86ISD::THREAD_POINTER_FROM_GS, dl, PtrVT); + } + } + // @LOCALMOD-END + // Comparison intrinsics. case Intrinsic::x86_sse_comieq_ss: case Intrinsic::x86_sse_comilt_ss: @@ -10570,7 +10755,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); - unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; + unsigned FrameReg = Subtarget->has64BitPointers() ? X86::RBP : X86::EBP; // @LOCALMOD SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, @@ -10590,10 +10775,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { SDValue Handler = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); + // @LOCALMOD-START + bool Has64BitPointers = Subtarget->has64BitPointers(); SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, - Subtarget->is64Bit() ? X86::RBP : X86::EBP, + Has64BitPointers ? X86::RBP : X86::EBP, getPointerTy()); - unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); + unsigned StoreAddrReg = (Has64BitPointers ? X86::RCX : X86::ECX); + // @LOCALMOD-END SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame, DAG.getIntPtrConstant(RegInfo->getSlotSize())); @@ -11674,6 +11862,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ADD: return LowerADD(Op, DAG); case ISD::SUB: return LowerSUB(Op, DAG); + // @LOCALMOD-BEGIN + case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG); + case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG); + case ISD::NACL_TARGET_ARCH: return LowerNaClTargetArch(Op, DAG); + // @LOCALMOD-END } } @@ -11957,6 +12150,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR"; + case X86ISD::TLSADDR_LE: return "X86ISD::TLSADDR_LE"; // @LOCALMOD + case X86ISD::TLSADDR_IE: return "X86ISD::TLSADDR_IE"; // @LOCALMOD case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_SJLJ_SETJMP: return "X86ISD::EH_SJLJ_SETJMP"; case X86ISD::EH_SJLJ_LONGJMP: return "X86ISD::EH_SJLJ_LONGJMP"; @@ -12967,9 +13162,11 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); // Machine Information + bool IsNaCl = Subtarget->isTargetNaCl(); // @LOCALMOD const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); + const TargetRegisterClass *AddrRegClass = + getRegClassFor(getPointerTy()); // @LOCALMOD const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); DebugLoc DL = MI->getDebugLoc(); @@ -12997,7 +13194,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( MachineBasicBlock *overflowMBB; MachineBasicBlock *offsetMBB; MachineBasicBlock *endMBB; - + unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB unsigned OffsetReg = 0; @@ -13078,29 +13275,39 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( } // In offsetMBB, emit code to use the reg_save_area. + unsigned Opc; // @LOCALMOD if (offsetMBB) { assert(OffsetReg != 0); // Read the reg_save_area address. unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); - BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) + Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD + BuildMI(offsetMBB, DL, TII->get(Opc), RegSaveReg) // @LOCALMOD .addOperand(Base) .addOperand(Scale) .addOperand(Index) - .addDisp(Disp, 16) + .addDisp(Disp, 8+TD->getPointerSize(0)) // @LOCALMOD .addOperand(Segment) .setMemRefs(MMOBegin, MMOEnd); // Zero-extend the offset - unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); - BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) - .addImm(0) - .addReg(OffsetReg) - .addImm(X86::sub_32bit); + // @LOCALMOD-BEGIN + unsigned OffsetRegExt; + if (IsNaCl) { + OffsetRegExt = OffsetReg; + } else { + OffsetRegExt = MRI.createVirtualRegister(AddrRegClass); + BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetRegExt) + .addImm(0) + .addReg(OffsetReg) + .addImm(X86::sub_32bit); + } + // @LOCALMOD-END // Add the offset to the reg_save_area to get the final address. - BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) - .addReg(OffsetReg64) + Opc = IsNaCl ? X86::ADD32rr : X86::ADD64rr; // @LOCALMOD + BuildMI(offsetMBB, DL, TII->get(Opc), OffsetDestReg) + .addReg(OffsetRegExt) // @LOCALMOD .addReg(RegSaveReg); // Compute the offset for the next argument @@ -13130,7 +13337,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( // Load the overflow_area address into a register. unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); - BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) + Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD + BuildMI(overflowMBB, DL, TII->get(Opc), OverflowAddrReg) .addOperand(Base) .addOperand(Scale) .addOperand(Index) @@ -13146,11 +13354,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); // aligned_addr = (addr + (align-1)) & ~(align-1) - BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) + Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD + BuildMI(overflowMBB, DL, TII->get(Opc), TmpReg) .addReg(OverflowAddrReg) .addImm(Align-1); - BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) + Opc = IsNaCl ? X86::AND32ri : X86::AND64ri32; // @LOCALMOD + BuildMI(overflowMBB, DL, TII->get(Opc), OverflowDestReg) .addReg(TmpReg) .addImm(~(uint64_t)(Align-1)); } else { @@ -13161,12 +13371,14 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( // Compute the next overflow address after this argument. // (the overflow address should be kept 8-byte aligned) unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); - BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) + Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD + BuildMI(overflowMBB, DL, TII->get(Opc), NextAddrReg) .addReg(OverflowDestReg) .addImm(ArgSizeA8); // Store the new overflow address. - BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) + Opc = IsNaCl ? X86::MOV32mr : X86::MOV64mr; // @LOCALMOD + BuildMI(overflowMBB, DL, TII->get(Opc)) .addOperand(Base) .addOperand(Scale) .addOperand(Index) @@ -13541,6 +13753,25 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, return BB; } +// @LOCALMOD-BEGIN +MachineBasicBlock * +X86TargetLowering::EmitLoweredThreadPointerFromGs(MachineInstr *MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + // This generates "movl %gs:0, %DEST", which fetches the thread + // pointer on x86-32. + BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), MI->getOperand(0).getReg()) + .addReg(/*Base=*/0) + .addImm(/*Scale=*/1) + .addReg(/*IndexReg=*/0) + .addImm(/*Disp=*/0) + .addReg(/*Segment=*/X86::GS); + MI->eraseFromParent(); + return BB; +} +// @LOCALMOD-END + MachineBasicBlock * X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -13816,6 +14047,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitLoweredSegAlloca(MI, BB, false); case X86::SEG_ALLOCA_64: return EmitLoweredSegAlloca(MI, BB, true); + // @LOCALMOD-BEGIN + case X86::THREAD_POINTER_FROM_GS: + return EmitLoweredThreadPointerFromGs(MI, BB); + // @LOCALMOD-END case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); @@ -14012,6 +14247,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); case X86::VAARG_64: + case X86::NACL_CG_VAARG_64: return EmitVAARG64WithCustomInserter(MI, BB); case X86::EH_SjLj_SetJmp32: @@ -15698,6 +15934,12 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, } unsigned Bits = VT.getSizeInBits(); + // @LOCALMOD-START + // Due to a limitation in NaCl's 32-bit validator, + // 16-bit shld instructions are illegal in 32-bit NaCl. + if (Subtarget->isTargetNaCl() && !Subtarget->is64Bit() && Bits == 16) + return SDValue(); + // @LOCALMOD-END if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { @@ -17801,4 +18043,3 @@ unsigned X86VectorTargetTransformInfo::getCastInstrCost(unsigned Opcode, return VectorTargetTransformImpl::getCastInstrCost(Opcode, Dst, Src); } - diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 465c6036ad..b6e8960f76 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -214,6 +214,16 @@ namespace llvm { // TLSBASEADDR - Thread Local Storage. A call to get the start address // of the TLS block for the current module. TLSBASEADDR, + // @LOCALMOD-BEGIN + // TLSADDR_LE - Thread Local Storage. (Local Exec Model) + TLSADDR_LE, + + // TLSADDR_IE - Thread Local Storage. (Initial Exec Model) + TLSADDR_IE, + + // THREAD_POINTER_FROM_GS - Read thread pointer from %gs:0 on x86-32. + THREAD_POINTER_FROM_GS, + // @LOCALMOD-END // TLSCALL - Thread Local Storage. When calling to an OS provided // thunk at the address from an earlier relocation. @@ -465,6 +475,7 @@ namespace llvm { //===--------------------------------------------------------------------===// // X86TargetLowering - X86 Implementation of the TargetLowering interface class X86TargetLowering : public TargetLowering { + public: explicit X86TargetLowering(X86TargetMachine &TM); @@ -718,6 +729,9 @@ namespace llvm { const X86Subtarget *Subtarget; const X86RegisterInfo *RegInfo; const DataLayout *TD; + // @LOCALMOD - This is essentially a revert of r167104 + /// X86StackPtr - X86 physical register used as stack ptr. + unsigned X86StackPtr; /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 /// floating point ops. @@ -819,6 +833,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; @@ -831,11 +846,18 @@ namespace llvm { SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + // @LOCALMOD-BEGIN + SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const; + // @LOCALMOD-END + + // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const; SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const; @@ -903,6 +925,12 @@ namespace llvm { MachineBasicBlock *BB, bool Is64Bit) const; + // @LOCALMOD-BEGIN + MachineBasicBlock *EmitLoweredThreadPointerFromGs( + MachineInstr *MI, + MachineBasicBlock *BB) const; + // @LOCALMOD-END + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f790611b8f..f580b76d95 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -32,8 +32,9 @@ def LEA64_32r : I<0x8D, MRMSrcMem, [(set GR32:$dst, lea32addr:$src)], IIC_LEA>, Requires<[In64BitMode]>; +// @LOCALMOD (lea64mem) let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)], IIC_LEA>; diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 9e6f27988f..a24ddf6f99 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -92,8 +92,8 @@ def VAARG_64 : I<0, Pseudo, "#VAARG_64 $dst, $ap, $size, $mode, $align", [(set GR64:$dst, (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), - (implicit EFLAGS)]>; - + (implicit EFLAGS)]>, + Requires<[IsNotNaCl]>; // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows // targets. These calls are needed to probe the stack when allocating more than // 4k bytes in one go. Touching the stack at 4K increments is necessary to @@ -399,7 +399,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_addr32", [(X86tlsaddr tls32addr:$sym)]>, - Requires<[In32BitMode]>; + Requires<[In32BitMode, IsNotNaCl]>; def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "# TLS_base_addr32", [(X86tlsbaseaddr tls32baseaddr:$sym)]>, @@ -425,6 +425,16 @@ def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), Requires<[In64BitMode]>; } +// @LOCALMOD-BEGIN +// NaCl TLS support +let usesCustomInserter = 1 in { + def THREAD_POINTER_FROM_GS : + I<0, Pseudo, (outs GR32:$dst), (ins), + "# get thread pointer from %gs:0", + [(set GR32:$dst, (X86thread_pointer_from_gs))]>; +} +// @LOCALMOD-END + // Darwin TLS Support // For i386, the address of the thunk is passed on the stack, on return the // address of the variable is in %eax. %ecx is trashed during the function @@ -993,9 +1003,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>; + (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsNotNaCl]>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>; + (CALL64pcrel32 texternalsym:$dst)>, Requires<[IsNotNaCl]>; // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they // can never use callee-saved registers. That is the purpose of the GR64_TC @@ -1024,7 +1034,7 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), // callee-saved register. def : Pat<(X86tcret (load addr:$dst), imm:$off), (TCRETURNmi addr:$dst, imm:$off)>, - Requires<[In32BitMode, IsNotPIC]>; + Requires<[In32BitMode, IsNotPIC, IsNotNaCl]>; def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), (TCRETURNdi texternalsym:$dst, imm:$off)>, @@ -1036,29 +1046,29 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // Normal calls, with various flavors of addresses. def : Pat<(X86call (i32 tglobaladdr:$dst)), - (CALLpcrel32 tglobaladdr:$dst)>; + (CALLpcrel32 tglobaladdr:$dst)>, Requires<[IsNotNaCl]>; def : Pat<(X86call (i32 texternalsym:$dst)), - (CALLpcrel32 texternalsym:$dst)>; + (CALLpcrel32 texternalsym:$dst)>, Requires<[IsNotNaCl]>; def : Pat<(X86call (i32 imm:$dst)), - (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; + (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr, IsNotNaCl]>; // Comparisons. @@ -1483,19 +1493,19 @@ def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)), - sub_8bit_hi))>; + sub_8bit_hi))>, Requires<[IsNotNaCl]>; def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), sub_8bit_hi))>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), (MOV8mr_NOREX addr:$dst, (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), sub_8bit_hi))>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // (shl x, 1) ==> (add x, x) diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index bfe954114c..5dd04aad69 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -112,7 +112,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst", [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>; def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst", - [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>; + [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode,IsNotNaCl]>; def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst", [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>; @@ -126,7 +126,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { (ins i32imm:$off, i16imm:$seg), "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>; def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst), - "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>; + "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, Requires<[IsNotNaCl]>; def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst), "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize; @@ -155,10 +155,10 @@ let isCall = 1 in "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>; def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst), "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>, - Requires<[In32BitMode]>; + Requires<[In32BitMode,IsNotNaCl]>; // @LOCALMOD def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst), "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>, - Requires<[In32BitMode]>; + Requires<[In32BitMode,IsNotNaCl]>; // @LOCALMOD def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs), (ins i16imm:$off, i16imm:$seg), @@ -178,9 +178,20 @@ let isCall = 1 in let isAsmParserOnly = 1 in def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, (outs), (ins i16imm_pcrel:$dst), - "callw\t$dst", []>, OpSize; + "callw\t$dst", []>, OpSize, + Requires<[IsNotNaCl]>; // @LOCALMOD } +// @LOCALMOD-BEGIN +// These CodeGen patterns are normally part of the declaration above. +// However, we need to be able to disable these patterns for NaCl +// without disabling the the instruction itself. (so we can use the +// instruction in assembly input) +def : Pat<(X86call GR32:$dst), + (CALL32r GR32:$dst)>, Requires<[IsNotNaCl]>; +def : Pat<(X86call (loadi32 addr:$dst)), + (CALL32m addr:$dst)>, Requires<[IsNotNaCl]>; +// @LOCALMOD-END // Tail call stuff. @@ -205,7 +216,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, "", [], IIC_JMP_REG>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst), - "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; + "jmp{l}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>, Requires<[IsNotNaCl]>; // @LOCALMOD } @@ -223,18 +234,18 @@ let isCall = 1, Uses = [RSP] in { def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst), "call{q}\t$dst", [], IIC_CALL_RI>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // @LOCALMOD def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)], IIC_CALL_RI>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // @LOCALMOD def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))], IIC_CALL_MEM>, - Requires<[In64BitMode]>; + Requires<[In64BitMode, IsNotNaCl]>; // @LOCALMOD def FARCALL64 : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst), - "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>; + "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>, Requires<[IsNotNaCl]>; // @LOCALMOD } let isCall = 1, isCodeGenOnly = 1 in @@ -269,5 +280,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, let mayLoad = 1 in def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst), - "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>; + "jmp{q}\t{*}$dst # TAILCALL", [], IIC_JMP_MEM>, + Requires<[IsNotNaCl]>; // @LOCALMOD } diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 268e9fc9c0..7309942880 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -53,6 +53,7 @@ def MRM_DC : Format<53>; def MRM_DD : Format<54>; def MRM_DE : Format<55>; def MRM_DF : Format<56>; +def CustomFrm : Format<62>; // @LOCALMOD // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 5a99ff004d..0267fdd860 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -276,12 +276,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } + // @LOCALMOD-BEGIN + unsigned NoForwardForNaCl = + tm.getSubtarget<X86Subtarget>().isTargetNaCl() ? TB_NO_FORWARD : 0; + // @LOCALMOD-END + static const X86OpTblEntry OpTbl0[] = { { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, - { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, - { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, + { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD | NoForwardForNaCl }, + { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD | NoForwardForNaCl }, { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, @@ -308,8 +313,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, - { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, - { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, + { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD | NoForwardForNaCl }, + { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD | NoForwardForNaCl }, { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, @@ -348,8 +353,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, - { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, - { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, + { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD | NoForwardForNaCl }, + { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD | NoForwardForNaCl }, { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, @@ -2869,6 +2874,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) << " to " << RI.getName(DestReg) << '\n'); + MBB.dump(); llvm_unreachable("Cannot emit physreg copy instruction"); } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 650fa95d7f..cec4625135 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -103,6 +103,10 @@ def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +// @LOCALMOD-BEGIN +def SDT_X86ThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; +// @LOCALMOD-END + def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>; @@ -213,6 +217,17 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR, def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; +// @LOCALMOD-BEGIN +def X86tlsaddr_le : SDNode<"X86ISD::TLSADDR_LE", SDT_X86TLSADDR, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def X86tlsaddr_ie : SDNode<"X86ISD::TLSADDR_IE", SDT_X86TLSADDR, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +def X86thread_pointer_from_gs : + SDNode<"X86ISD::THREAD_POINTER_FROM_GS", SDT_X86ThreadPointer>; +// @LOCALMOD-END + def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; @@ -518,6 +533,13 @@ def i64i8imm : Operand<i64> { let OperandType = "OPERAND_IMMEDIATE"; } +// @LOCALMOD +def lea64mem : Operand<i64> { + let PrintMethod = "printi64mem"; + let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + def lea64_32mem : Operand<i32> { let PrintMethod = "printi32mem"; let AsmOperandLowerMethod = "lower_lea64_32mem"; @@ -533,7 +555,8 @@ def lea64_32mem : Operand<i32> { // Define X86 specific addressing mode. def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>; def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", - [add, sub, mul, X86mul_imm, shl, or, frameindex], + [add, sub, mul, X86mul_imm, shl, or, frameindex, + X86WrapperRIP], // @LOCALMOD []>; def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; @@ -599,7 +622,7 @@ def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit">; def IsWin64 : Predicate<"Subtarget->isTargetWin64()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; -def NotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; +def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">; def KernelCode : Predicate<"TM.getCodeModel() == CodeModel::Kernel">; def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&" @@ -1682,6 +1705,12 @@ let Predicates = [HasBMI2] in { //===----------------------------------------------------------------------===// include "X86InstrArithmetic.td" + +//===----------------------------------------------------------------------===// +// NaCl support (@LOCALMOD) +//===----------------------------------------------------------------------===// + +include "X86InstrNaCl.td" include "X86InstrCMovSetCC.td" include "X86InstrExtension.td" include "X86InstrControl.td" diff --git a/lib/Target/X86/X86InstrNaCl.td b/lib/Target/X86/X86InstrNaCl.td new file mode 100644 index 0000000000..8a7eebecd7 --- /dev/null +++ b/lib/Target/X86/X86InstrNaCl.td @@ -0,0 +1,357 @@ +//====- X86InstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the modifications to the X86 instruction set needed for +// Native Client code generation. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// NaCl specific DAG Nodes. +// + +//===----------------------------------------------------------------------===// +// +// Native Client Pseudo-Instructions +// +// These instructions implement the Native Client pseudo-instructions, such +// as nacljmp and naclasp. +// +// TableGen and MC consider these to be "real" instructions. They can be +// parsed by the AsmParser and emitted by the AsmStreamer as if they +// were just regular instructions. They are not marked "Pseudo" because +// this would imply isCodeGenOnly=1, which would stop them from being +// parsed by the assembler. +// +// These instructions cannot be encoded (written into an object file) by the +// MCCodeEmitter. Instead, during direct object emission, they get lowered to +// a sequence of streamer emits. (see X86InstrNaCl.cpp) +// +// These instructions should not be used in CodeGen. They have no pattern +// and lack CodeGen metadata. Instead, the X86NaClRewritePass should +// generate these instructions after CodeGen is finished. +// +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// 32-bit Native Client Pseudo Instructions +//===----------------------------------------------------------------------===// + +class NaClPI32<dag outs, dag ins, string asm> + : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In32BitMode]>; + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in { + def NACL_TRAP32 : NaClPI32<(outs), (ins), "nacltrap">; +} + +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in { + def NACL_RET32 : NaClPI32<(outs), (ins), "naclret">; + def NACL_RETI32 : NaClPI32<(outs), (ins i16imm:$amt), "naclreti\t$amt">; +} + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, + isAsmParserOnly = 1 in { + def NACL_JMP32r : NaClPI32<(outs), (ins GR32:$dst), "nacljmp\t$dst">; +} + +let isCall = 1, isAsmParserOnly = 1 in { + def NACL_CALL32d : NaClPI32<(outs), (ins i32imm_pcrel:$dst), + "naclcall\t$dst">; + def NACL_CALL32r : NaClPI32<(outs), (ins GR32:$dst), + "naclcall\t$dst">; +} + +// nacltlsaddr32 gets rewritten to: +// .bundle_align_end +// .bundle_lock +// leal\t$sym@TLSGD, %eax +// call\t___tls_get_addr@PLT +// .bundle_unlock +// (The linker expects the leal+call sequence to be directly adjacent) +let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP], + isAsmParserOnly = 1 in +def NACL_TLS_addr32 : NaClPI32<(outs), (ins i32mem:$sym), + "nacltlsaddr32\t$sym">; + +//===----------------------------------------------------------------------===// +// 64-bit Native Client Pseudo Instructions +//===----------------------------------------------------------------------===// + +class NaClPI64<dag outs, dag ins, string asm> + : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In64BitMode]>; + +let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in { + def NACL_TRAP64 : NaClPI64<(outs), (ins), "nacltrap">; +} + +let isTerminator = 1, isReturn = 1, isBarrier = 1, + hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in { + def NACL_RET64 : NaClPI64<(outs), (ins), "naclret">; +} + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, + isAsmParserOnly = 1 in { + def NACL_JMP64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP), + "nacljmp\t{$dst, $rZP|$rZP, $dst}">; + def NACL_JMP64z : NaClPI64<(outs), (ins GR32:$dst), + "nacljmp\t$dst">; +} + + +let isCall = 1, isAsmParserOnly = 1 in { + def NACL_CALL64d : NaClPI64<(outs), (ins i32imm_pcrel:$dst), + "naclcall\t$dst">; + def NACL_CALL64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP), + "naclcall\t$dst,$rZP">; +} + +let Defs = [RSP, EFLAGS], Uses = [RSP], isAsmParserOnly = 1 in { + def NACL_ASPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP), + "naclasp{q}\t{$off, $rZP|$rZP, $off}">; + + def NACL_ASPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP), + "naclasp{q}\t{$off, $rZP|$rZP, $off}">; + + def NACL_SSPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP), + "naclssp{q}\t{$off, $rZP|$rZP, $off}">; + + def NACL_SSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP), + "naclssp{q}\t{$off, $rZP|$rZP, $off}">; + + def NACL_ANDSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP), + "naclandsp{q}\t{$off, $rZP|$rZP, $off}">; +} + +let Defs = [RSP], Uses = [RBP], isAsmParserOnly = 1 in { + def NACL_SPADJi32 : NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP), + "naclspadj\t{$off, $rZP|$rZP, $off}">; +} + +let Defs = [RSP], isAsmParserOnly = 1 in { + def NACL_RESTSPr : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP), + "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">; + def NACL_RESTSPm : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP), + "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">; + def NACL_RESTSPrz : NaClPI64<(outs), (ins GR32:$src), + "naclrestsp_noflags\t$src">; +} + +def : MnemonicAlias<"naclrestsp", "naclrestsp_noflags">; + +let Defs = [RBP], isAsmParserOnly = 1 in { + def NACL_RESTBPr : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP), + "naclrestbp\t{$src, $rZP|$rZP, $src}">; + def NACL_RESTBPm : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP), + "naclrestbp\t{$src, $rZP|$rZP, $src}">; + def NACL_RESTBPrz : NaClPI64<(outs), (ins GR32:$src), + "naclrestbp\t$src">; +} + +//===----------------------------------------------------------------------===// +// +// Code Generator Instructions (isCodeGenOnly == 1) +// +// These instructions exists to make CodeGen work with Native Client's +// modifications. +// +// Many of these instructions exist because of limitations in CodeGen +// or TableGen, and may become unnecessary in the future. +//===----------------------------------------------------------------------===// + + +//===----------------------------------------------------------------------===// +// +// CodeGen 32-bit +// +//===----------------------------------------------------------------------===// + + +// To avoid a naming conflict between call/naclcall, we have to +// disable the real CALLpcrel32 and CALL32r instructions when targeting +// for NaCl. Thus, they need to be produced here. + +let isCall = 1 in + // All calls clobber the non-callee saved registers. ESP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in { + + def NACL_CG_CALLpcrel32 : I<0, Pseudo, + (outs), (ins i32imm_pcrel:$dst), + "naclcall\t$dst", []>, + Requires<[IsNaCl, In32BitMode]>; + def NACL_CG_CALL32r : I<0, Pseudo, + (outs), (ins GR32:$dst), + "naclcall\t$dst", [(X86call GR32:$dst)]>, + Requires<[IsNaCl, In32BitMode]>; +} + +// Normal calls, with various flavors of addresses. +def : Pat<(X86call (i32 tglobaladdr:$dst)), + (NACL_CG_CALLpcrel32 tglobaladdr:$dst)>, + Requires<[IsNaCl, In32BitMode]>; +def : Pat<(X86call (i32 texternalsym:$dst)), + (NACL_CG_CALLpcrel32 texternalsym:$dst)>, + Requires<[IsNaCl, In32BitMode]>; +def : Pat<(X86call (i32 imm:$dst)), + (NACL_CG_CALLpcrel32 imm:$dst)>, + Requires<[IsNaCl, In32BitMode, CallImmAddr]>; + +//===----------------------------------------------------------------------===// +// +// CodeGen 64-bit +// +//===----------------------------------------------------------------------===// + + +// Because pointers are 32-bit on X86-64 Native Client, we need to +// produce new versions of the JMP64/CALL64 instructions which can accept +// addresses which are i32 instead of i64. + +let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { + def NACL_CG_JMP64r : I<0, Pseudo, (outs), (ins GR32:$dst), + "nacljmp\t$dst", + [(brind GR32:$dst)]>, + Requires<[IsNaCl, In64BitMode]>; +} + +let isCall = 1 in + // All calls clobber the non-callee saved registers. RSP is marked as + // a use to prevent stack-pointer assignments that appear immediately + // before calls from potentially appearing dead. Uses for argument + // registers are added manually. + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + + def NACL_CG_CALL64pcrel32 : I<0, Pseudo, (outs), + (ins i32imm_pcrel:$dst), + "naclcall\t$dst", []>, + Requires<[IsNaCl, In64BitMode]>; + + def NACL_CG_CALL64r : I<0, Pseudo, (outs), (ins GR32:$dst), + "naclcall\t$dst,%r15", + [(X86call GR32:$dst)]>, + Requires<[IsNaCl, In64BitMode]>; +} + +def : Pat<(X86call (i32 tglobaladdr:$dst)), + (NACL_CG_CALL64pcrel32 tglobaladdr:$dst)>, + Requires<[IsNaCl, In64BitMode]>; +def : Pat<(X86call (i32 texternalsym:$dst)), + (NACL_CG_CALL64pcrel32 texternalsym:$dst)>, + Requires<[IsNaCl, In64BitMode]>; + +// Tail calls +// Also needed due to the i64 / i32 pointer problem. +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, + isCodeGenOnly = 1 in + let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { + + def NACL_CG_TCRETURNdi64 : I<0, Pseudo, (outs), + (ins i32imm_pcrel:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", []>, + Requires<[IsNaCl, In64BitMode]>; + def NACL_CG_TCRETURNri64 : I<0, Pseudo, (outs), + (ins GR32_TC_64:$dst, i32imm:$offset), + "#TC_RETURN $dst $offset", []>, + Requires<[IsNaCl, In64BitMode]>; + + def NACL_CG_TAILJMPd64 : I<0, Pseudo, (outs), + (ins i32imm_pcrel:$dst), + "jmp\t$dst # TAILCALL", []>, + Requires<[IsNaCl, In64BitMode]>; + def NACL_CG_TAILJMPr64 : I<0, Pseudo, (outs), + (ins GR32_TC_64:$dst), + "nacljmp\t$dst,%r15 # TAILCALL", []>, + Requires<[IsNaCl, In64BitMode]>; +} + +def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off), + (NACL_CG_TCRETURNdi64 tglobaladdr:$dst, imm:$off)>, + Requires<[IsNaCl, In64BitMode]>; + +def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off), + (NACL_CG_TCRETURNdi64 texternalsym:$dst, imm:$off)>, + Requires<[IsNaCl, In64BitMode]>; + +def : Pat<(X86tcret GR32_TC_64:$dst, imm:$off), + (NACL_CG_TCRETURNri64 GR32_TC_64:$dst, imm:$off)>, + Requires<[IsNaCl, In64BitMode]>; + +// ELF TLS Support + +let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [ESP] in +def NACL_CG_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + ".bundle_align_end" + ".bundle_lock" + "leal\t$sym, %eax; " + "call\t___tls_get_addr@PLT" + ".bundle_unlock", + [(X86tlsaddr tls32addr:$sym)]>, + Requires<[In32BitMode, IsNaCl]>; + +// These are lowered in X86NaClRewritePass. +let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, + FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1, + MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, + XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], + Uses = [RSP] in { +def NACL_CG_GD_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "", + [(X86tlsaddr tls32addr:$sym)]>, + Requires<[IsNaCl, In64BitMode]>; +def NACL_CG_LE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "", + [(X86tlsaddr_le tls32addr:$sym)]>, + Requires<[IsNaCl, In64BitMode]>; +def NACL_CG_IE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "", + [(X86tlsaddr_ie tls32addr:$sym)]>, + Requires<[IsNaCl, In64BitMode]>; +// For mtls-use-call. +def NACL_CG_LE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "", + [(X86tlsaddr_le tls32addr:$sym)]>, + Requires<[IsNaCl, In32BitMode]>; +def NACL_CG_IE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "", + [(X86tlsaddr_ie tls32addr:$sym)]>, + Requires<[IsNaCl, In32BitMode]>; +} + +let usesCustomInserter = 1, Defs = [EFLAGS] in +def NACL_CG_VAARG_64 : I<0, Pseudo, + (outs GR32:$dst), + (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), + "#NACL_VAARG_64 $dst, $ap, $size, $mode, $align", + [(set GR32:$dst, + (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)), + (implicit EFLAGS)]>, + Requires<[IsNaCl, In64BitMode]>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 764aa5d4f2..4b528f6153 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -18,6 +18,8 @@ #include "X86TargetMachine.h" #include "llvm/Function.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h"//TODO(dschuff):don't forget to remove these +#include "llvm/Support/Disassembler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Valgrind.h" #include <cstdlib> @@ -82,7 +84,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction; // Provide a wrapper for X86CompilationCallback2 that saves non-traditional // callee saved registers, for the fastcc calling convention. extern "C" { -#if defined(X86_64_JIT) +#if defined(X86_64_JIT) && !defined(__native_client__) # ifndef _MSC_VER // No need to save EAX/EDX for X86-64. void X86CompilationCallback(void); @@ -230,7 +232,11 @@ extern "C" { "popl %ebp\n" CFI(".cfi_adjust_cfa_offset -4\n") CFI(".cfi_restore %ebp\n") +#if defined(__native_client__) // @LOCALMOD-BEGIN + "popl %ecx; nacljmp %ecx\n" +#else "ret\n" +#endif // @LOCALMOD-END CFI(".cfi_endproc\n") SIZE(X86CompilationCallback) ); @@ -295,7 +301,11 @@ extern "C" { "popl %ebp\n" CFI(".cfi_adjust_cfa_offset -4\n") CFI(".cfi_restore %ebp\n") +#if defined(__native_client__) // @LOCALMOD-BEGIN + "popl %ecx; nacljmp %ecx\n" +#else "ret\n" +#endif // @LOCALMOD-END CFI(".cfi_endproc\n") SIZE(X86CompilationCallback_SSE) ); @@ -469,7 +479,14 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { // The 32-bit stub contains a 5-byte call|jmp. // If the stub is a call to the compilation callback, an extra byte is added // to mark it as a stub. +#ifdef __native_client__ + // NaCl call targets must be bundle-aligned. In the case of stubs with + // CALLs, the calls do not need to be aligned to the end of the bundle + // because there is no return + StubLayout Result = {32, 32};//TODO(dschuff): use named constant here +#else StubLayout Result = {14, 4}; +#endif return Result; } @@ -498,6 +515,9 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JCE.emitByte(0xE9); JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); #endif + DEBUG(dbgs() <<"emitted stub: "<< sys::disassembleBuffer( + (uint8_t *)Result,JCE.getCurrentPCValue()-(uintptr_t)Result, + (intptr_t)Result)); return Result; } @@ -519,6 +539,9 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, // initialize the buffer with garbage, which means it may follow a // noreturn function call, confusing X86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! + DEBUG(dbgs() <<"emitted stub: "<< sys::disassembleBuffer( + (uint8_t *)Result,JCE.getCurrentPCValue()-(uintptr_t)Result, + (intptr_t)Result)); return Result; } diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index cfd68f74b7..20bc85e65f 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -703,7 +703,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // Emit the call. MCSymbol *PICBase = MF->getPICBaseSymbol(); - TmpInst.setOpcode(X86::CALLpcrel32); + // @LOCALMOD-BEGIN + // For NaCl, the call should be aligned to the end of a bundle. Since the + // call is at the end of the bundle, there should be no padding between + // the call and the next instruction (the label should still make sense). + TmpInst.setOpcode(getSubtarget().isTargetNaCl() ? + X86::NACL_CALL32d : X86::CALLpcrel32); + // @LOCALMOD-END // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase, diff --git a/lib/Target/X86/X86NaClJITInfo.cpp b/lib/Target/X86/X86NaClJITInfo.cpp new file mode 100644 index 0000000000..e5ccbf960d --- /dev/null +++ b/lib/Target/X86/X86NaClJITInfo.cpp @@ -0,0 +1,393 @@ +//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the JIT interfaces for the X86 target on Native Client +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jit" +#include "X86NaClJITInfo.h" +#include "X86Relocations.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include <cstdlib> +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Disassembler.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Valgrind.h" +#ifdef __native_client__ +#include <nacl/nacl_dyncode.h> +#endif + +using namespace llvm; + +extern cl::opt<int> FlagSfiX86JmpMask; + +// Determine the platform we're running on +#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64) +# define X86_64_JIT +#elif defined(__i386__) || defined(i386) || defined(_M_IX86) +# define X86_32_JIT +#elif defined(__pnacl__) +#warning "PNaCl does not yet have JIT support" +#else +#error "Should not be building X86NaClJITInfo on non-x86" +// TODO(dschuff): make this work under pnacl self-build? +#endif + +// Get the ASMPREFIX for the current host. This is often '_'. +#ifndef __USER_LABEL_PREFIX__ +#define __USER_LABEL_PREFIX__ +#endif +#define GETASMPREFIX2(X) #X +#define GETASMPREFIX(X) GETASMPREFIX2(X) +#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) + +# define SIZE(sym) ".size " #sym ", . - " #sym "\n" +# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n" + +void X86NaClJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { + // We don't know the original instruction boundaries, so we replace the + // whole bundle. + uint8_t buf[kBundleSize]; + buf[0] = 0xE9; // Emit JMP opcode. + intptr_t OldAddr = ((uintptr_t)Old + 1); + uint32_t NewOffset = (intptr_t)New - OldAddr - 4;// PC-relative offset of new + *((uint32_t*)(buf + 1)) = NewOffset; + memcpy(buf + 5, getNopSequence(kBundleSize - 5), kBundleSize - 5); + +#ifdef __native_client__ + if(nacl_dyncode_create(Old, buf, kBundleSize)) { + report_fatal_error("machine code replacement failed"); + } +#endif + + // X86 doesn't need to invalidate the processor cache, so just invalidate + // Valgrind's cache directly. + sys::ValgrindDiscardTranslations(Old, 5); +} + +/// JITCompilerFunction - This contains the address of the JIT function used to +/// compile a function lazily. +static TargetJITInfo::JITCompilerFn JITCompilerFunction; + +extern "C" { +#if defined(X86_64_JIT) || defined(__pnacl__) || !defined(__native_client__) +void X86NaClCompilationCallback(void) { +//TODO(dschuff): implement for X86-64 +} +void X86NaClCompilationCallback_fastcc(void) { +//TODO(dschuff): implement for X86-64 +} +#else +// Chrome system requirements include PIII, So SSE is present. +// For now this is the same as X86CompilationCallback_SSE +// In the future we could emit this rather than defining it with asm, for +// compatibility with pnacl self-build +// Also omit CFI junk (which is #defined away) + +// The difference between the 2 wrapper variants is that the first returns +// through ecx and the 2nd returns through eax. The fastcc calling convention +// uses ecx to pass arguments, and the C calling convention uses eax to pass +// arguments with the 'inreg' attribute, so we make sure not to clobber it. +// Returning through eax for fastcc and ecx for C clobbers the 'nest' parameter +// breaking nested functions (which are not supported by clang in any case). + +void X86NaClCompilationCallback(void); +asm( + ".text\n" + ".align 32\n" + ".globl " ASMPREFIX "X86NaClCompilationCallback\n" + TYPE_FUNCTION(X86NaClCompilationCallback) + ASMPREFIX "X86NaClCompilationCallback:\n" + "pushl %ebp\n" + "movl %esp, %ebp\n" // Standard prologue + "pushl %eax\n" + "pushl %edx\n" // Save EAX/EDX/ECX + "pushl %ecx\n" + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86NaClCompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + "movaps 32(%esp), %xmm2\n" + "movaps 16(%esp), %xmm1\n" + "movaps (%esp), %xmm0\n" + "movl %ebp, %esp\n" // Restore ESP + "subl $12, %esp\n" + "popl %ecx\n" + "popl %edx\n" + "popl %eax\n" + "popl %ebp\n" + "popl %ecx\n" + "nacljmp %ecx\n" + SIZE(X86NaClCompilationCallback) +); + + + +void X86NaClCompilationCallback_fastcc(void); +asm( + ".text\n" + ".align 32\n" + ".globl " ASMPREFIX "X86NaClCompilationCallback_fastcc\n" + TYPE_FUNCTION(X86NaClCompilationCallback_fastcc) + ASMPREFIX "X86NaClCompilationCallback_fastcc:\n" + "pushl %ebp\n" + "movl %esp, %ebp\n" // Standard prologue + "pushl %eax\n" + "pushl %edx\n" // Save EAX/EDX/ECX + "pushl %ecx\n" + "andl $-16, %esp\n" // Align ESP on 16-byte boundary + // Save all XMM arg registers + "subl $64, %esp\n" + // FIXME: provide frame move information for xmm registers. + // This can be tricky, because CFA register is ebp (unaligned) + // and we need to produce offsets relative to it. + "movaps %xmm0, (%esp)\n" + "movaps %xmm1, 16(%esp)\n" + "movaps %xmm2, 32(%esp)\n" + "movaps %xmm3, 48(%esp)\n" + "subl $16, %esp\n" + "movl 4(%ebp), %eax\n" // Pass prev frame and return address + "movl %eax, 4(%esp)\n" + "movl %ebp, (%esp)\n" + "call " ASMPREFIX "X86NaClCompilationCallback2\n" + "addl $16, %esp\n" + "movaps 48(%esp), %xmm3\n" + "movaps 32(%esp), %xmm2\n" + "movaps 16(%esp), %xmm1\n" + "movaps (%esp), %xmm0\n" + "movl %ebp, %esp\n" // Restore ESP + "subl $12, %esp\n" + "popl %ecx\n" + "popl %edx\n" + "popl %eax\n" + "popl %ebp\n" + "popl %eax\n" + "nacljmp %eax\n" + SIZE(X86NaClCompilationCallback_fastcc) +); +#endif + +/// X86CompilationCallback2 - This is the target-specific function invoked by the +/// function stub when we did not know the real target of a call. This function +/// must locate the start of the stub or call site and pass it into the JIT +/// compiler function. + +// A stub has the following format: +// | Jump opcode (1 byte) | Jump target +22 bytes | 3 bytes of NOPs +// | 18 bytes of NOPs | 1 halt | Call opcode (1 byte) | call target +// The jump targets the call at the end of the bundle, which targets the +// compilation callback. Once the compilation callback JITed the target +// function it replaces the first 8 bytes of the stub in a single atomic +// operation, retargeting the jump at the JITed function. + +static uint8_t *BundleRewriteBuffer; + +static void LLVM_ATTRIBUTE_USED +X86NaClCompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { + // Get the return address from where the call instruction left it + intptr_t *RetAddrLoc = &StackPtr[1]; + assert(*RetAddrLoc == RetAddr && + "Could not find return address on the stack!"); + + // TODO: take a lock here. figure out whether it has to be the JIT lock or + // can be our own lock (or however we handle thread safety) +#if 0 + DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr + << " ESP=" << (void*)StackPtr << "\n"); +#endif + + intptr_t StubStart = RetAddr - 32; + // This probably isn't necessary. I believe the corresponding code in + // X86JITInfo is vestigial, and AFAICT no non-stub calls to the compilation + // callback are generated anywhere. Still it doesn't hurt as a sanity check + bool isStub = *((unsigned char*)StubStart) == 0xE9 && + *((int32_t*)(StubStart + 1)) == 22 && + *((unsigned char*)(StubStart + 26)) == 0xF4; + + assert(isStub && "NaCl doesn't support rewriting non-stub callsites yet"); + + // Backtrack so RetAddr points inside the stub (so JITResolver can find + // which function to compile) + RetAddr -= 4; + + intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr); + + // Rewrite the stub's call target, so that we don't end up here every time we + // execute the call. + + // Get the first 8 bytes of the stub + memcpy(BundleRewriteBuffer, (void *)(StubStart), 8); + // Point the jump at the newly-JITed code + *((intptr_t *)(BundleRewriteBuffer + 1)) = NewVal - (StubStart + 5); + + // Copy the new code +#ifdef __native_client__ + if(nacl_dyncode_modify((void *)StubStart, BundleRewriteBuffer, 8)) { + report_fatal_error("dyncode_modify failed"); + } +#endif + // TODO: release the lock + + // Change our return address to execute the new jump + *RetAddrLoc = StubStart; +} + +} + +const int X86NaClJITInfo::kBundleSize; + +TargetJITInfo::LazyResolverFn +X86NaClJITInfo::getLazyResolverFunction(JITCompilerFn F) { + JITCompilerFunction = F; + return X86NaClCompilationCallback; +} + +X86NaClJITInfo::X86NaClJITInfo(X86TargetMachine &tm) : X86JITInfo(tm) { + // FIXME: does LLVM have some way of doing static initialization? +#ifndef __pnacl__ + if(posix_memalign((void **)&BundleRewriteBuffer, kBundleSize, kBundleSize)) + report_fatal_error("Could not allocate aligned memory"); +#else + BundleRewriteBuffer = NULL; +#endif + + NopString = new uint8_t[kBundleSize]; + for (int i = 0; i < kBundleSize; i++) NopString[i] = 0x90; + X86Hlt.ins = new uint8_t[1]; + X86Hlt.ins[0] = 0xf4; + X86Hlt.len = 1; +} + +X86NaClJITInfo::~X86NaClJITInfo() { + delete [] NopString; + delete [] X86Hlt.ins; +} + +TargetJITInfo::StubLayout X86NaClJITInfo::getStubLayout() { + // NaCl stubs must be full bundles because calls still have to be aligned + // even if they don't return + StubLayout Result = {kBundleSize, kBundleSize}; + return Result; +} + + +void *X86NaClJITInfo::emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE) { + bool TargetsCC = Target == (void *)(intptr_t)X86NaClCompilationCallback; + + // If we target the compilation callback, swap it for a different one for + // functions using the fastcc calling convention + if(TargetsCC && F->getCallingConv() == CallingConv::Fast) { + Target = (void *)(intptr_t)X86NaClCompilationCallback_fastcc; + } + + void *Result = (void *)JCE.getCurrentPCValue(); + assert(RoundUpToAlignment((uintptr_t)Result, kBundleSize) == (uintptr_t)Result + && "Unaligned function stub"); + if (!TargetsCC) { + // Jump to the target + JCE.emitByte(0xE9); + JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4); + // Fill with Nops. + emitNopPadding(JCE, 27); + } else { + // Jump over 22 bytes + JCE.emitByte(0xE9); + JCE.emitWordLE(22); + // emit 3-bytes of nop to ensure an instruction boundary at 8 bytes + emitNopPadding(JCE, 3); + // emit 18 bytes of nop + emitNopPadding(JCE, 18); + // emit 1 byte of halt. This helps CompilationCallback tell whether + // we came from a stub or not + JCE.emitByte(X86Hlt.ins[0]); + // emit a call to the compilation callback + JCE.emitByte(0xE8); + JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4); + } + return Result; +} + +// Relocations are the same as in X86, but the address being written +// not the same as the address that the offset is relative to (see comment on +// setRelocationBuffer in X86NaClJITInfo.h +void X86NaClJITInfo::relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase) { + for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { + void *RelocPos = RelocationBuffer + MR->getMachineCodeOffset(); + void *RelocTargetPos = (char*)Function + MR->getMachineCodeOffset(); + intptr_t ResultPtr = (intptr_t)MR->getResultPointer(); + switch ((X86::RelocationType)MR->getRelocationType()) { + case X86::reloc_pcrel_word: { + // PC relative relocation, add the relocated value to the value already in + // memory, after we adjust it for where the PC is. + ResultPtr = ResultPtr -(intptr_t)RelocTargetPos - 4 - MR->getConstantVal(); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_picrel_word: { + // PIC base relative relocation, add the relocated value to the value + // already in memory, after we adjust it for where the PIC base is. + ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal()); + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + } + case X86::reloc_absolute_word: + case X86::reloc_absolute_word_sext: + // Absolute relocation, just add the relocated value to the value already + // in memory. + *((unsigned*)RelocPos) += (unsigned)ResultPtr; + break; + case X86::reloc_absolute_dword: + *((intptr_t*)RelocPos) += ResultPtr; + break; + } + } +} + +const uint8_t *X86NaClJITInfo::getNopSequence(size_t len) const { + // TODO(dschuff): use more efficient NOPs. + // Update emitNopPadding when it happens + assert((int)len <= kBundleSize && + "Nop sequence can't be more than bundle size"); + return NopString; +} + +void X86NaClJITInfo::emitNopPadding(JITCodeEmitter &JCE, size_t len) { + for (size_t i = 0; i < len; i++) JCE.emitByte(NopString[i]); +} + +const TargetJITInfo::HaltInstruction *X86NaClJITInfo::getHalt() const { + return &X86Hlt; +} + +int X86NaClJITInfo::getBundleSize() const { + return kBundleSize; +} + +int32_t X86NaClJITInfo::getJumpMask() const { + return FlagSfiX86JmpMask; +} diff --git a/lib/Target/X86/X86NaClJITInfo.h b/lib/Target/X86/X86NaClJITInfo.h new file mode 100644 index 0000000000..9416efeff1 --- /dev/null +++ b/lib/Target/X86/X86NaClJITInfo.h @@ -0,0 +1,75 @@ +//=- X86NaClJITInfo.h - X86 implementation of the JIT interface --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the X86 implementation of the TargetJITInfo class for +// Native Client +// +//===----------------------------------------------------------------------===// + +#ifndef X86NACLJITINFO_H +#define X86NACLJITINFO_H + +#include "X86JITInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/JITCodeEmitter.h" +#include "llvm/Target/TargetJITInfo.h" + +namespace llvm { + class X86NaClJITInfo : public X86JITInfo { + void emitNopPadding(JITCodeEmitter &JCE, size_t len); + const X86Subtarget *Subtarget; + uintptr_t PICBase; + uint8_t *NopString; + HaltInstruction X86Hlt; + uint8_t *RelocationBuffer; + public: + static const int kBundleSize = 32; + explicit X86NaClJITInfo(X86TargetMachine &tm); + virtual ~X86NaClJITInfo(); + + virtual void replaceMachineCodeForFunction(void *Old, void *New); + + // getStubLayout - Returns the size and alignment of the largest call stub + // on X86 NaCl. + virtual StubLayout getStubLayout(); + + // Note: the emission and functions MUST NOT touch the target memory + virtual void *emitFunctionStub(const Function* F, void *Target, + JITCodeEmitter &JCE); + /// getLazyResolverFunction - Expose the lazy resolver to the JIT. + virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); + /// relocate - Before the JIT can run a block of code that has been emitted, + /// it must rewrite the code to contain the actual addresses of any + /// referenced global symbols. + virtual void relocate(void *Function, MachineRelocation *MR, + unsigned NumRelocs, unsigned char* GOTBase); + + virtual char* allocateThreadLocalMemory(size_t size) { + //TODO(dschuff) Implement TLS or decide whether X86 TLS works + assert(0 && "This target does not implement thread local storage!"); + return 0; + } + /// Return a string containing a sequence of NOPs which is valid for + /// the given length + virtual const uint8_t *getNopSequence(size_t len) const; + virtual const HaltInstruction *getHalt() const; + virtual int getBundleSize() const; + virtual int getJumpMask() const; + /// Relocations cannot happen in-place in NaCl because we can't write to + /// code. This function takes a pointer to where the code has been emitted, + /// before it is copied to the code region. The subsequent call to + /// relocate takes pointers to the target code location, but rewrites the + /// code in the relocation buffer rather than at the target + virtual void setRelocationBuffer(unsigned char * BufferBegin) { + RelocationBuffer = BufferBegin; + } + }; +} + +#endif diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp new file mode 100644 index 0000000000..b6276dc583 --- /dev/null +++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp @@ -0,0 +1,232 @@ +//=== X86NaClRewriteFinalPass.cpp - Expand NaCl pseudo-instructions --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass expands NaCl pseudo-instructions into real instructions. +// This duplicates much of the functionality found in X86MCNaCl.cpp but is +// needed for non-MC JIT, which doesn't use MC. It expands pseudo instructions +// into bundle-locked groups by emitting a BUNDLE_LOCK marker, +// followed by the instructions, followed by a BUNDLE_UNLOCK marker. +// The Code Emitter needs to ensure the alignment as it emits. Additionallly, +// this pass needs to be run last, or the user at least needs to ensure that +// subsequent passes do not reorder or remove any bundled groups. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "x86-jit-sandboxing" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Function.h" + +using namespace llvm; + +extern cl::opt<int> FlagSfiX86JmpMask; + +namespace { + class X86NaClRewriteFinalPass : public MachineFunctionPass { + public: + static char ID; + X86NaClRewriteFinalPass() : MachineFunctionPass(ID), + kJumpMask(FlagSfiX86JmpMask) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "NaCl Pseudo-instruction expansion"; + } + + private: + const int kJumpMask; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + bool Is64Bit; + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + void TraceLog(const char *fun, + const MachineBasicBlock &MBB, + const MachineBasicBlock::iterator MBBI) const; + + void RewriteIndirectJump(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool Is64Bit, + bool IsCall); + void RewriteDirectCall(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool Is64Bit); + bool ApplyCommonRewrites(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + }; + + char X86NaClRewriteFinalPass::ID = 0; +} + +void X86NaClRewriteFinalPass::RewriteIndirectJump(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool Is64Bit, + bool IsCall) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + + DEBUG(dbgs() << "rewrite indirect jump " << MBB); + + unsigned reg32 = MI.getOperand(0).getReg(); + unsigned reg64 = getX86SubSuperRegister(reg32, MVT::i64); + + if (IsCall) + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END)); + + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_LOCK)); + + BuildMI(MBB, MBBI, DL, TII->get(X86::AND32ri8)) + .addReg(reg32) + .addReg(reg32) + //.addOperand(MI.getOperand(0))//correct flags, but might be 64bit reg + .addImm(kJumpMask); + + if (Is64Bit) { + BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64rr)) + .addReg(reg64) + .addReg(reg64) + .addReg(X86::R15); + } + + if (IsCall) { + BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r)) + .addReg(Is64Bit ? reg64 : reg32); + } else { + BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::JMP64r : X86::JMP32r)) + .addReg(Is64Bit ? reg64 : reg32); + } + + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_UNLOCK)); + MI.eraseFromParent(); + + DEBUG(dbgs() << "done rewrite indirect jump " << MBB); +} + +void X86NaClRewriteFinalPass::RewriteDirectCall(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + bool Is64Bit) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + DEBUG(dbgs() << "rewrite direct call " << MBB); + const MachineOperand &MO = MI.getOperand(0); + // rewrite calls to immediates as indirect calls. + if (MO.isImm()) { + DEBUG(dbgs() << " is immediate " << MO); + // First, rewrite as a move imm->reg + indirect call sequence, + BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32ri)) + .addReg(X86::ECX) + .addOperand(MO); + BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r)) + .addReg(X86::ECX); + // Then use RewriteIndirectJump to sandbox it + MachineBasicBlock::iterator I = MBBI; + --I; // I now points at the call instruction + MI.eraseFromParent(); + return RewriteIndirectJump(MBB, I, Is64Bit, true); + } + + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END)); + + BuildMI(MBB, MBBI, DL, + TII->get(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32)) + .addOperand(MI.getOperand(0)); + + MI.eraseFromParent(); +} + +bool X86NaClRewriteFinalPass::ApplyCommonRewrites(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + unsigned Opcode = MI.getOpcode(); + switch(Opcode) { + case X86::NACL_CALL32d: + RewriteDirectCall(MBB, MBBI, false); + break; + case X86::NACL_CALL64d: + RewriteDirectCall(MBB, MBBI, true); + break; + case X86::NACL_CALL32r: + RewriteIndirectJump(MBB, MBBI, false, true); + return true; + case X86::NACL_CALL64r: + RewriteIndirectJump(MBB, MBBI, true, true); + return true; + case X86::NACL_JMP32r: + RewriteIndirectJump(MBB, MBBI, false, false); + return true; + case X86::NACL_JMP64r: + RewriteIndirectJump(MBB, MBBI, true, false); + return true; + case X86::NACL_TRAP32: + case X86::NACL_TRAP64: + case X86::NACL_ASPi8: + case X86::NACL_ASPi32: + case X86::NACL_SSPi8: + case X86::NACL_SSPi32: + case X86::NACL_SPADJi32: + case X86::NACL_RESTBPm: + case X86::NACL_RESTBPr: + case X86::NACL_RESTSPm: + case X86::NACL_RESTSPr: + dbgs() << "inst, opcode not handled: " << MI << Opcode; + assert(false && "NaCl Pseudo-inst not handled"); + case X86::NACL_RET32: + case X86::NACL_RET64: + case X86::NACL_RETI32: + assert(false && "Should not get RETs here"); + } + return false; +} + +bool X86NaClRewriteFinalPass::runOnMachineFunction(MachineFunction &MF) { + bool modified = false; + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); + const X86Subtarget *subtarget = &TM->getSubtarget<X86Subtarget>(); + assert(subtarget->isTargetNaCl() && "Target in NaClRewriteFinal is not NaCl"); + + DEBUG(dbgs() << "*************** NaCl Rewrite Final ***************\n"); + DEBUG(dbgs() << " funcnum " << MF.getFunctionNumber() << " " + << MF.getFunction()->getName() << "\n"); + + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); + MFI != E; ++MFI) { + modified |= runOnMachineBasicBlock(*MFI); + } + + DEBUG(dbgs() << "************* NaCl Rewrite Final Done *************\n"); + return modified; +} + +bool X86NaClRewriteFinalPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool modified = false; + for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI; + MBBI != MBB.end(); MBBI = NextMBBI) { + ++NextMBBI; + if (ApplyCommonRewrites(MBB, MBBI)) { + modified = true; + } + } + return modified; +} + +// return an instance of the pass +namespace llvm { + FunctionPass *createX86NaClRewriteFinalPass() { + return new X86NaClRewriteFinalPass(); + } +} diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp new file mode 100644 index 0000000000..7310dcd77a --- /dev/null +++ b/lib/Target/X86/X86NaClRewritePass.cpp @@ -0,0 +1,762 @@ +//=== X86NaClRewritePAss.cpp - Rewrite instructions for NaCl SFI --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that ensures stores and loads and stack/frame +// pointer addresses are within the NaCl sandbox (for x86-64). +// It also ensures that indirect control flow follows NaCl requirments. +// +// The other major portion of rewriting for NaCl is done in X86InstrNaCl.cpp, +// which is responsible for expanding the NaCl-specific operations introduced +// here and also the intrinsic functions to support setjmp, etc. +//===----------------------------------------------------------------------===// +#define DEBUG_TYPE "x86-sandboxing" + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +extern cl::opt<bool> FlagUseZeroBasedSandbox; +cl::opt<bool> FlagRestrictR15("sfi-restrict-r15", + cl::desc("Restrict use of %r15. This flag can" + " be turned off for the zero-based" + " sandbox model."), + cl::init(true)); + +namespace { + class X86NaClRewritePass : public MachineFunctionPass { + public: + static char ID; + X86NaClRewritePass() : MachineFunctionPass(ID) {} + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "NaCl Rewrites"; + } + + private: + + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const X86Subtarget *Subtarget; + bool Is64Bit; + + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + void TraceLog(const char *func, + const MachineBasicBlock &MBB, + const MachineBasicBlock::iterator MBBI) const; + + bool ApplyRewrites(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + bool ApplyStackSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + bool ApplyMemorySFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + bool ApplyFrameSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + bool ApplyControlSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); + + void PassLightWeightValidator(MachineBasicBlock &MBB); + bool AlignJumpTableTargets(MachineFunction &MF); + }; + + char X86NaClRewritePass::ID = 0; + +} + +static void DumpInstructionVerbose(const MachineInstr &MI); + +static bool IsPushPop(MachineInstr &MI) { + const unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return false; + case X86::PUSH64r: + case X86::POP64r: + return true; + } +} + +static bool IsStore(MachineInstr &MI) { + return MI.getDesc().mayStore(); +} + +static bool IsLoad(MachineInstr &MI) { + return MI.getDesc().mayLoad(); +} + +static bool IsFrameChange(MachineInstr &MI) { + return MI.modifiesRegister(X86::EBP, NULL) || + MI.modifiesRegister(X86::RBP, NULL); +} + +static bool IsStackChange(MachineInstr &MI) { + return MI.modifiesRegister(X86::ESP, NULL) || + MI.modifiesRegister(X86::RSP, NULL); +} + + +static bool HasControlFlow(const MachineInstr &MI) { + return MI.getDesc().isBranch() || + MI.getDesc().isCall() || + MI.getDesc().isReturn() || + MI.getDesc().isTerminator() || + MI.getDesc().isBarrier(); +} + +static bool IsDirectBranch(const MachineInstr &MI) { + return MI.getDesc().isBranch() && + !MI.getDesc().isIndirectBranch(); +} + +static bool IsRegAbsolute(unsigned Reg) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + const bool RestrictR15 = FlagRestrictR15; + assert(UseZeroBasedSandbox || RestrictR15); + return (Reg == X86::RSP || Reg == X86::RBP || + (Reg == X86::R15 && RestrictR15)); +} + +static bool FindMemoryOperand(const MachineInstr &MI, unsigned* index) { + int NumFound = 0; + unsigned MemOp = 0; + for (unsigned i = 0; i < MI.getNumOperands(); ) { + if (isMem(&MI, i)) { + NumFound++; + MemOp = i; + i += X86::AddrNumOperands; + } else { + i++; + } + } + + // Intrinsics and other functions can have mayLoad and mayStore to reflect + // the side effects of those functions. This function is used to find + // explicit memory references in the instruction, of which there are none. + if (NumFound == 0) + return false; + + if (NumFound > 1) + llvm_unreachable("Too many memory operands in instruction!"); + + *index = MemOp; + return true; +} + +static unsigned PromoteRegTo64(unsigned RegIn) { + if (RegIn == 0) + return 0; + unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i64, false); + assert(RegOut != 0); + return RegOut; +} + +static unsigned DemoteRegTo32(unsigned RegIn) { + if (RegIn == 0) + return 0; + unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i32, false); + assert(RegOut != 0); + return RegOut; +} + + +// +// True if this MI restores RSP from RBP with a slight adjustment offset. +// +static bool MatchesSPAdj(const MachineInstr &MI) { + assert (MI.getOpcode() == X86::LEA64r && "Call to MatchesSPAdj w/ non LEA"); + const MachineOperand &DestReg = MI.getOperand(0); + const MachineOperand &BaseReg = MI.getOperand(1); + const MachineOperand &Scale = MI.getOperand(2); + const MachineOperand &IndexReg = MI.getOperand(3); + const MachineOperand &Offset = MI.getOperand(4); + return (DestReg.isReg() && DestReg.getReg() == X86::RSP && + BaseReg.isReg() && BaseReg.getReg() == X86::RBP && + Scale.getImm() == 1 && + IndexReg.isReg() && IndexReg.getReg() == 0 && + Offset.isImm()); +} + +void +X86NaClRewritePass::TraceLog(const char *func, + const MachineBasicBlock &MBB, + const MachineBasicBlock::iterator MBBI) const { + DEBUG(dbgs() << "@" << func << "(" << MBB.getName() << ", " << (*MBBI) << ")\n"); +} + +bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + TraceLog("ApplyStackSFI", MBB, MBBI); + assert(Is64Bit); + MachineInstr &MI = *MBBI; + + if (!IsStackChange(MI)) + return false; + + if (IsPushPop(MI)) + return false; + + if (MI.getDesc().isCall()) + return false; + + unsigned Opc = MI.getOpcode(); + DebugLoc DL = MI.getDebugLoc(); + unsigned DestReg = MI.getOperand(0).getReg(); + assert(DestReg == X86::ESP || DestReg == X86::RSP); + + unsigned NewOpc = 0; + switch (Opc) { + case X86::ADD64ri8 : NewOpc = X86::NACL_ASPi8; break; + case X86::ADD64ri32: NewOpc = X86::NACL_ASPi32; break; + case X86::SUB64ri8 : NewOpc = X86::NACL_SSPi8; break; + case X86::SUB64ri32: NewOpc = X86::NACL_SSPi32; break; + case X86::AND64ri32: NewOpc = X86::NACL_ANDSPi32; break; + } + if (NewOpc) { + BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) + .addImm(MI.getOperand(2).getImm()) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + MI.eraseFromParent(); + return true; + } + + // Promote "MOV ESP, EBP" to a 64-bit move + if (Opc == X86::MOV32rr && MI.getOperand(1).getReg() == X86::EBP) { + MI.getOperand(0).setReg(X86::RSP); + MI.getOperand(1).setReg(X86::RBP); + MI.setDesc(TII->get(X86::MOV64rr)); + Opc = X86::MOV64rr; + } + + // "MOV RBP, RSP" is already safe + if (Opc == X86::MOV64rr && MI.getOperand(1).getReg() == X86::RBP) { + return true; + } + + // Promote 32-bit lea to 64-bit lea (does this ever happen?) + assert(Opc != X86::LEA32r && "Invalid opcode in 64-bit mode!"); + if (Opc == X86::LEA64_32r) { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned BaseReg = MI.getOperand(1).getReg(); + unsigned Scale = MI.getOperand(2).getImm(); + unsigned IndexReg = MI.getOperand(3).getReg(); + assert(DestReg == X86::ESP); + assert(Scale == 1); + assert(BaseReg == X86::EBP); + assert(IndexReg == 0); + MI.getOperand(0).setReg(X86::RSP); + MI.getOperand(1).setReg(X86::RBP); + MI.setDesc(TII->get(X86::LEA64r)); + Opc = X86::LEA64r; + } + + if (Opc == X86::LEA64r && MatchesSPAdj(MI)) { + const MachineOperand &Offset = MI.getOperand(4); + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32)) + .addImm(Offset.getImm()) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + MI.eraseFromParent(); + return true; + } + + if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) { + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr)) + .addReg(DemoteRegTo32(MI.getOperand(1).getReg())) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + MI.eraseFromParent(); + return true; + } + + if (Opc == X86::MOV32rm) { + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPm)) + .addOperand(MI.getOperand(1)) // Base + .addOperand(MI.getOperand(2)) // Scale + .addOperand(MI.getOperand(3)) // Index + .addOperand(MI.getOperand(4)) // Offset + .addOperand(MI.getOperand(5)) // Segment + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + MI.eraseFromParent(); + return true; + } + + DumpInstructionVerbose(MI); + llvm_unreachable("Unhandled Stack SFI"); +} + +bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + TraceLog("ApplyFrameSFI", MBB, MBBI); + assert(Is64Bit); + MachineInstr &MI = *MBBI; + + if (!IsFrameChange(MI)) + return false; + + unsigned Opc = MI.getOpcode(); + DebugLoc DL = MI.getDebugLoc(); + + // Handle moves to RBP + if (Opc == X86::MOV64rr) { + assert(MI.getOperand(0).getReg() == X86::RBP); + unsigned SrcReg = MI.getOperand(1).getReg(); + + // MOV RBP, RSP is already safe + if (SrcReg == X86::RSP) + return false; + + // Rewrite: mov %rbp, %rX + // To: naclrestbp %eX, %rZP + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr)) + .addReg(DemoteRegTo32(SrcReg)) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + MI.eraseFromParent(); + return true; + } + + // Handle memory moves to RBP + if (Opc == X86::MOV64rm) { + assert(MI.getOperand(0).getReg() == X86::RBP); + + // Zero-based sandbox model uses address clipping + if (UseZeroBasedSandbox) + return false; + + // Rewrite: mov %rbp, (...) + // To: naclrestbp (...), %rZP + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm)) + .addOperand(MI.getOperand(1)) // Base + .addOperand(MI.getOperand(2)) // Scale + .addOperand(MI.getOperand(3)) // Index + .addOperand(MI.getOperand(4)) // Offset + .addOperand(MI.getOperand(5)) // Segment + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + MI.eraseFromParent(); + return true; + } + + // Popping onto RBP + // Rewrite to: + // naclrestbp (%rsp), %rZP + // naclasp $8, %rZP + // + // TODO(pdox): Consider rewriting to this instead: + // .bundle_lock + // pop %rbp + // mov %ebp,%ebp + // add %rZP, %rbp + // .bundle_unlock + if (Opc == X86::POP64r) { + assert(MI.getOperand(0).getReg() == X86::RBP); + + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm)) + .addReg(X86::RSP) // Base + .addImm(1) // Scale + .addReg(0) // Index + .addImm(0) // Offset + .addReg(0) // Segment + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8)) + .addImm(8) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + + MI.eraseFromParent(); + return true; + } + + DumpInstructionVerbose(MI); + llvm_unreachable("Unhandled Frame SFI"); +} + +bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + TraceLog("ApplyControlSFI", MBB, MBBI); + MachineInstr &MI = *MBBI; + + if (!HasControlFlow(MI)) + return false; + + // Direct branches are OK + if (IsDirectBranch(MI)) + return false; + + DebugLoc DL = MI.getDebugLoc(); + unsigned Opc = MI.getOpcode(); + + // Rewrite indirect jump/call instructions + unsigned NewOpc = 0; + switch (Opc) { + // 32-bit + case X86::JMP32r : NewOpc = X86::NACL_JMP32r; break; + case X86::TAILJMPr : NewOpc = X86::NACL_JMP32r; break; + case X86::NACL_CG_CALL32r : NewOpc = X86::NACL_CALL32r; break; + // 64-bit + case X86::NACL_CG_JMP64r : NewOpc = X86::NACL_JMP64r; break; + case X86::NACL_CG_CALL64r : NewOpc = X86::NACL_CALL64r; break; + case X86::NACL_CG_TAILJMPr64 : NewOpc = X86::NACL_JMP64r; break; + } + if (NewOpc) { + MachineInstrBuilder NewMI = + BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) + .addOperand(MI.getOperand(0)); + if (Is64Bit) { + NewMI.addReg(UseZeroBasedSandbox ? 0 : X86::R15); + } + MI.eraseFromParent(); + return true; + } + + // EH_RETURN has a single argment which is not actually used directly. + // The argument gives the location where to reposition the stack pointer + // before returning. EmitPrologue takes care of that repositioning. + // So EH_RETURN just ultimately emits a plain "ret". + // RETI returns and pops some number of bytes from the stack. + if (Opc == X86::RET || Opc == X86::EH_RETURN || Opc == X86::EH_RETURN64 || + Opc == X86::RETI) { + // To maintain compatibility with nacl-as, for now we don't emit naclret. + // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32)); + if (Is64Bit) { + BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX); + if (Opc == X86::RETI) { + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi32)) + .addOperand(MI.getOperand(0)) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + } + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r)) + .addReg(X86::ECX) + .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + } else { + BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX); + if (Opc == X86::RETI) { + BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP) + .addReg(X86::ESP) + .addOperand(MI.getOperand(0)); + } + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r)) + .addReg(X86::ECX); + } + MI.eraseFromParent(); + return true; + } + + // Rewrite trap + if (Opc == X86::TRAP) { + // To maintain compatibility with nacl-as, for now we don't emit nacltrap. + // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32)); + BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi)) + .addReg(Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0) // Base + .addImm(1) // Scale + .addReg(0) // Index + .addImm(0) // Offset + .addReg(0) // Segment + .addImm(0); // Value + MI.eraseFromParent(); + return true; + } + + DumpInstructionVerbose(MI); + llvm_unreachable("Unhandled Control SFI"); +} + +// +// Sandboxes loads and stores (64-bit only) +// +bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + TraceLog("ApplyMemorySFI", MBB, MBBI); + assert(Is64Bit); + MachineInstr &MI = *MBBI; + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + + if (!IsLoad(MI) && !IsStore(MI)) + return false; + + if (IsPushPop(MI)) + return false; + + unsigned MemOp; + if (!FindMemoryOperand(MI, &MemOp)) + return false; + assert(isMem(&MI, MemOp)); + MachineOperand &BaseReg = MI.getOperand(MemOp + 0); + MachineOperand &Scale = MI.getOperand(MemOp + 1); + MachineOperand &IndexReg = MI.getOperand(MemOp + 2); + //MachineOperand &Disp = MI.getOperand(MemOp + 3); + MachineOperand &SegmentReg = MI.getOperand(MemOp + 4); + + // RIP-relative addressing is safe. + if (BaseReg.getReg() == X86::RIP) + return false; + + // Make sure the base and index are 64-bit registers. + IndexReg.setReg(PromoteRegTo64(IndexReg.getReg())); + BaseReg.setReg(PromoteRegTo64(BaseReg.getReg())); + assert(IndexReg.getSubReg() == 0); + assert(BaseReg.getSubReg() == 0); + + bool AbsoluteBase = IsRegAbsolute(BaseReg.getReg()); + bool AbsoluteIndex = IsRegAbsolute(IndexReg.getReg()); + unsigned AddrReg = 0; + + if (AbsoluteBase && AbsoluteIndex) { + llvm_unreachable("Unexpected absolute register pair"); + } else if (AbsoluteBase) { + AddrReg = IndexReg.getReg(); + } else if (AbsoluteIndex) { + assert(!BaseReg.getReg() && "Unexpected base register"); + assert(Scale.getImm() == 1); + AddrReg = 0; + } else { + if (!BaseReg.getReg()) { + // No base, fill in relative. + BaseReg.setReg(UseZeroBasedSandbox ? 0 : X86::R15); + AddrReg = IndexReg.getReg(); + } else if (!UseZeroBasedSandbox) { + // Switch base and index registers if index register is undefined. + // That is do conversions like "mov d(%r,0,0) -> mov d(%r15, %r, 1)". + assert (!IndexReg.getReg() + && "Unexpected index and base register"); + IndexReg.setReg(BaseReg.getReg()); + Scale.setImm(1); + BaseReg.setReg(X86::R15); + AddrReg = IndexReg.getReg(); + } else { + llvm_unreachable( + "Unexpected index and base register"); + } + } + + if (AddrReg) { + assert(!SegmentReg.getReg() && "Unexpected segment register"); + SegmentReg.setReg(X86::PSEUDO_NACL_SEG); + return true; + } + + return false; +} + +bool X86NaClRewritePass::ApplyRewrites(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + unsigned Opc = MI.getOpcode(); + + // These direct jumps need their opcode rewritten + // and variable operands removed. + unsigned NewOpc = 0; + switch (Opc) { + case X86::NACL_CG_CALLpcrel32 : NewOpc = X86::NACL_CALL32d; break; + case X86::TAILJMPd : NewOpc = X86::JMP_4; break; + case X86::NACL_CG_TAILJMPd64 : NewOpc = X86::JMP_4; break; + case X86::NACL_CG_CALL64pcrel32: NewOpc = X86::NACL_CALL64d; break; + } + if (NewOpc) { + BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) + .addOperand(MI.getOperand(0)); + MI.eraseFromParent(); + return true; + } + + if (Opc == X86::NACL_CG_TLS_addr32) { + // Rewrite to nacltlsaddr32 + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_TLS_addr32)) + .addOperand(MI.getOperand(0)) // Base + .addOperand(MI.getOperand(1)) // Scale + .addOperand(MI.getOperand(2)) // Index + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, X86II::MO_TLSGD) + .addOperand(MI.getOperand(4)); // Segment + MI.eraseFromParent(); + return true; + } + + // General Dynamic NaCl TLS model + // http://code.google.com/p/nativeclient/issues/detail?id=1685 + if (Opc == X86::NACL_CG_GD_TLS_addr64) { + + // Rewrite to: + // leaq $sym@TLSGD(%rip), %rdi + // naclcall __tls_get_addr@PLT + BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), X86::RDI) + .addReg(X86::RIP) // Base + .addImm(1) // Scale + .addReg(0) // Index + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); // Segment + BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_CALL64d)) + .addExternalSymbol("__tls_get_addr", X86II::MO_PLT); + MI.eraseFromParent(); + return true; + } + + // Local Exec NaCl TLS Model + if (Opc == X86::NACL_CG_LE_TLS_addr64 || + Opc == X86::NACL_CG_LE_TLS_addr32) { + unsigned CallOpc, LeaOpc, Reg; + // Rewrite to: + // naclcall __nacl_read_tp@PLT + // lea $sym@flag(,%reg), %reg + if (Opc == X86::NACL_CG_LE_TLS_addr64) { + CallOpc = X86::NACL_CALL64d; + LeaOpc = X86::LEA64r; + Reg = X86::RAX; + } else { + CallOpc = X86::NACL_CALL32d; + LeaOpc = X86::LEA32r; + Reg = X86::EAX; + } + BuildMI(MBB, MBBI, DL, TII->get(CallOpc)) + .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT); + BuildMI(MBB, MBBI, DL, TII->get(LeaOpc), Reg) + .addReg(0) // Base + .addImm(1) // Scale + .addReg(Reg) // Index + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); // Segment + MI.eraseFromParent(); + return true; + } + + // Initial Exec NaCl TLS Model + if (Opc == X86::NACL_CG_IE_TLS_addr64 || + Opc == X86::NACL_CG_IE_TLS_addr32) { + unsigned CallOpc, AddOpc, Base, Reg; + // Rewrite to: + // naclcall __nacl_read_tp@PLT + // addq sym@flag(%base), %reg + if (Opc == X86::NACL_CG_IE_TLS_addr64) { + CallOpc = X86::NACL_CALL64d; + AddOpc = X86::ADD64rm; + Base = X86::RIP; + Reg = X86::RAX; + } else { + CallOpc = X86::NACL_CALL32d; + AddOpc = X86::ADD32rm; + Base = MI.getOperand(3).getTargetFlags() == X86II::MO_INDNTPOFF ? + 0 : X86::EBX; // EBX for GOTNTPOFF. + Reg = X86::EAX; + } + BuildMI(MBB, MBBI, DL, TII->get(CallOpc)) + .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT); + BuildMI(MBB, MBBI, DL, TII->get(AddOpc), Reg) + .addReg(Reg) + .addReg(Base) + .addImm(1) // Scale + .addReg(0) // Index + .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, + MI.getOperand(3).getTargetFlags()) + .addReg(0); // Segment + MI.eraseFromParent(); + return true; + } + + return false; +} + +bool X86NaClRewritePass::AlignJumpTableTargets(MachineFunction &MF) { + bool Modified = true; + + MF.setAlignment(5); // log2, 32 = 2^5 + + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + if (JTI != NULL) { + const std::vector<MachineJumpTableEntry> &JT = JTI->getJumpTables(); + for (unsigned i = 0; i < JT.size(); ++i) { + const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs; + for (unsigned j = 0; j < MBBs.size(); ++j) { + MBBs[j]->setAlignment(5); + Modified |= true; + } + } + } + return Modified; +} + +bool X86NaClRewritePass::runOnMachineFunction(MachineFunction &MF) { + bool Modified = false; + + TM = &MF.getTarget(); + TII = TM->getInstrInfo(); + TRI = TM->getRegisterInfo(); + Subtarget = &TM->getSubtarget<X86Subtarget>(); + Is64Bit = Subtarget->is64Bit(); + + assert(Subtarget->isTargetNaCl() && "Unexpected target in NaClRewritePass!"); + + DEBUG(dbgs() << "*************** NaCl Rewrite Pass ***************\n"); + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); + MFI != E; + ++MFI) { + Modified |= runOnMachineBasicBlock(*MFI); + } + Modified |= AlignJumpTableTargets(MF); + DEBUG(dbgs() << "*************** NaCl Rewrite DONE ***************\n"); + return Modified; +} + +bool X86NaClRewritePass::runOnMachineBasicBlock(MachineBasicBlock &MBB) { + bool Modified = false; + if (MBB.hasAddressTaken()) { + //FIXME: use a symbolic constant or get this value from some configuration + MBB.setAlignment(5); + Modified = true; + } + for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI; + MBBI != MBB.end(); MBBI = NextMBBI) { + ++NextMBBI; + // When one of these methods makes a change, + // it returns true, skipping the others. + if (ApplyRewrites(MBB, MBBI) || + (Is64Bit && ApplyStackSFI(MBB, MBBI)) || + (Is64Bit && ApplyMemorySFI(MBB, MBBI)) || + (Is64Bit && ApplyFrameSFI(MBB, MBBI)) || + ApplyControlSFI(MBB, MBBI)) { + Modified = true; + } + } + return Modified; +} + +static void DumpInstructionVerbose(const MachineInstr &MI) { + dbgs() << MI; + dbgs() << MI.getNumOperands() << " operands:" << "\n"; + for (unsigned i = 0; i < MI.getNumOperands(); ++i) { + const MachineOperand& op = MI.getOperand(i); + dbgs() << " " << i << "(" << op.getType() << "):" << op << "\n"; + } + dbgs() << "\n"; +} + +/// createX86NaClRewritePassPass - returns an instance of the pass. +namespace llvm { + FunctionPass* createX86NaClRewritePass() { + return new X86NaClRewritePass(); + } +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 73ac747742..9054345d35 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -54,6 +54,11 @@ cl::opt<bool> EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); +// @LOCALMOD-BEGIN +extern cl::opt<bool> FlagUseZeroBasedSandbox; +extern cl::opt<bool> FlagRestrictR15; +// @LOCALMOD-END + X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() @@ -365,6 +370,25 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { } } + // @LOCALMOD-START + const X86Subtarget& Subtarget = MF.getTarget().getSubtarget<X86Subtarget>(); + const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + const bool RestrictR15 = FlagRestrictR15; + assert(UseZeroBasedSandbox || RestrictR15); + if (Subtarget.isTargetNaCl64()) { + if (RestrictR15) { + Reserved.set(X86::R15); + Reserved.set(X86::R15D); + Reserved.set(X86::R15W); + Reserved.set(X86::R15B); + } + Reserved.set(X86::RBP); + Reserved.set(X86::EBP); + Reserved.set(X86::BP); + Reserved.set(X86::BPL); + } + // @LOCALMOD-END + return Reserved; } @@ -726,6 +750,9 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, return X86::R14D; case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: return X86::R15D; + // @LOCALMOD. TODO: possibly revert this after LEA .td fixes + case X86::EIP: case X86::RIP: + return X86::EIP; } case MVT::i64: // For 64-bit mode if we've requested a "high" register and the @@ -778,6 +805,9 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT, return X86::R14; case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15: return X86::R15; + // @LOCALMOD. TODO: possibly revert this after LEA .td fixes + case X86::EIP: case X86::RIP: + return X86::RIP; } } } diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index be6282a643..f3bfe9b328 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -270,6 +270,9 @@ def CR15 : X86Reg<"cr15", 15>; // Pseudo index registers def EIZ : X86Reg<"eiz", 4>; def RIZ : X86Reg<"riz", 4>; + +def PSEUDO_NACL_SEG : X86Reg<"nacl", 4>; // @LOCALMOD + //===----------------------------------------------------------------------===// @@ -336,6 +339,10 @@ def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>; def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>; def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>; def GR32_TC : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>; +// @LOCALMOD-START +def GR32_TC_64: RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI, + R8D, R9D, R11D)>; +// @LOCALMOD-END def GR64_TC : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, R11, RIP)>; def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 723e50cc18..a102935b4b 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -35,6 +35,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) { + // TODO: Can we allow this optimization for Native Client? + // At the very least, pointer size needs to be fixed below. + return SDValue(); + } + // @LOCALMOD-END + // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) return SDValue(); @@ -190,6 +198,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) return SDValue(); + // @LOCALMOD-BEGIN + if (Subtarget->isTargetNaCl()) { + // TODO(pdox): Allow use of the NaCl pseudo-instruction for REP MOV + return SDValue(); + } + // @LOCALMOD-END + /// If not DWORD aligned, it is more efficient to call the library. However /// if calling the library is not allowed (AlwaysInline), then soldier on as /// the code generated here is better than the long load-store sequence we diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index d1ed680287..0132f81410 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -160,7 +160,15 @@ const char *X86Subtarget::getBZeroEntry() const { bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const { if (In64BitMode) return false; - return isTargetELF() || TM.getRelocationModel() == Reloc::Static; + // @LOCALMOD-BEGIN + // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2367 + // For NaCl dynamic linking we do not want to generate a text relocation to + // an absolute address in PIC mode. Such a situation arises from + // test/CodeGen/X86/call-imm.ll with the default implementation. + // For other platforms we retain the default behavior. + return (isTargetELF() && !isTargetNaCl()) || + TM.getRelocationModel() == Reloc::Static; + // @LOCALMOD-END } void X86Subtarget::AutoDetectSubtargetFeatures() { @@ -416,10 +424,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, "64-bit code requested on a subtarget that doesn't support it!"); // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both - // 32 and 64 bit) and for all 64-bit targets. + // 32 and 64 bit), NaCl and for all 64-bit targets. if (StackAlignOverride) stackAlignment = StackAlignOverride; else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() || + isTargetNaCl() || // @LOCALMOD In64BitMode) stackAlignment = 16; } diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8bf4cc77f7..0f8cab52f2 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -192,6 +192,9 @@ public: bool is64Bit() const { return In64BitMode; } + // @LOCALMOD + bool has64BitPointers() const { return is64Bit() && !isTargetNaCl(); } + PICStyles::Style getPICStyle() const { return PICStyle; } void setPICStyle(PICStyles::Style Style) { PICStyle = Style; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 158f9dc066..59c037f296 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -43,6 +43,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT, getSubtargetImpl()->isTargetWindows()) ? "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-" "n8:16:32-S32" : + getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD + "e-p:32:32-s:32-f64:64:64-f32:32:32-f80:128:128-i64:64:64-n8:16:32-S128" : "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-" "n8:16:32-S128"), InstrInfo(*this), @@ -60,7 +62,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true), - DL("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" + DL(getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD + "e-p:32:32-s:64-f64:64:64-f32:32:32-f80:128:128-i64:64:64-" + "n8:16:32:64-S128" : + "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-" "n8:16:32:64-S128"), InstrInfo(*this), TSInfo(*this), @@ -192,11 +197,25 @@ bool X86PassConfig::addPreEmitPass() { ShouldPrint = true; } + // @LOCALMOD-START + if (getX86Subtarget().isTargetNaCl()) { + addPass(createX86NaClRewritePass()); + ShouldPrint = true; + } + // @LOCALMOD-END + return ShouldPrint; } bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) { + // @LOCALMOD-START + // Add this pass here instead of as a PreEmitPass because this function is + // only called in JIT mode + if (Subtarget.isTargetNaCl()) { + PM.add(createX86NaClRewriteFinalPass()); + } + // @LOCALMOD-END PM.add(createX86JITCodeEmitterPass(*this, JCE)); return false; diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 12311a1abf..967ce95d10 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -19,6 +19,9 @@ #include "X86ISelLowering.h" #include "X86FrameLowering.h" #include "X86JITInfo.h" +#ifdef __native_client__ +#include "X86NaClJITInfo.h" +#endif #include "X86SelectionDAGInfo.h" #include "X86Subtarget.h" #include "llvm/Target/TargetMachine.h" @@ -80,7 +83,11 @@ class X86_32TargetMachine : public X86TargetMachine { X86InstrInfo InstrInfo; X86SelectionDAGInfo TSInfo; X86TargetLowering TLInfo; +#ifdef __native_client__ + X86NaClJITInfo JITInfo; +#else X86JITInfo JITInfo; +#endif ScalarTargetTransformImpl STTI; X86VectorTargetTransformInfo VTTI; public: diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index 92aee0dd3f..4f39d68d40 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -9,6 +9,7 @@ #include "X86TargetObjectFile.h" #include "X86TargetMachine.h" +#include "X86Subtarget.h" // @LOCALMOD #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/MC/MCContext.h" @@ -51,3 +52,30 @@ X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { TargetLoweringObjectFileELF::Initialize(Ctx, TM); InitializeELF(TM.Options.UseInitArray); } + +// @LOCALMOD-START +// NOTE: this was largely lifted from +// lib/Target/ARM/ARMTargetObjectFile.cpp +// +// The default is .ctors/.dtors while the arm backend uses +// .init_array/.fini_array +// +// Without this the linker defined symbols __fini_array_start and +// __fini_array_end do not have useful values. c.f.: +// http://code.google.com/p/nativeclient/issues/detail?id=805 +void TargetLoweringObjectFileNaCl::Initialize(MCContext &Ctx, + const TargetMachine &TM) { + TargetLoweringObjectFileELF::Initialize(Ctx, TM); + + StaticCtorSection = + getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); + StaticDtorSection = + getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, + SectionKind::getDataRel()); +} +// @LOCALMOD-END diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index 2d320c594c..5fac48e57a 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -38,6 +38,13 @@ namespace llvm { virtual void Initialize(MCContext &Ctx, const TargetMachine &TM); }; + // @LOCALMOD-BEGIN + class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF { + public: + virtual void Initialize(MCContext &ctx, const TargetMachine &TM); + }; + // @LOCALMOD-END + } // end namespace llvm #endif diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt index de1353e6c1..9fa690971a 100644 --- a/lib/Transforms/CMakeLists.txt +++ b/lib/Transforms/CMakeLists.txt @@ -5,3 +5,4 @@ add_subdirectory(Scalar) add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(NaCl) diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp index 6716deb9e4..05aefeff9f 100644 --- a/lib/Transforms/IPO/ExtractGV.cpp +++ b/lib/Transforms/IPO/ExtractGV.cpp @@ -58,6 +58,15 @@ namespace { continue; if (I->getName() == "llvm.global_ctors") continue; + // @LOCALMOD-BEGIN - this is likely upstreamable + // Note: there will likely be more cases once this + // is exercises more thorougly. + if (I->getName() == "llvm.global_dtors") + continue; + // not observed yet + if (I->hasExternalWeakLinkage()) + continue; + // @LOCALMOD-END } bool Local = I->hasLocalLinkage(); @@ -78,8 +87,15 @@ namespace { if (!Delete) { if (I->hasAvailableExternallyLinkage()) continue; + // @LOCALMOD-BEGIN - this is likely upstreamable + // Note: there will likely be more cases once this + // is exercises more thorougly. + // observed for pthread_cancel + if (I->hasExternalWeakLinkage()) + continue; + // @LOCALMOD-END } - + bool Local = I->hasLocalLinkage(); if (Local) I->setVisibility(GlobalValue::HiddenVisibility); diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 5ad6f9111c..4f4c388a92 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1146,8 +1146,10 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { // If we are removing arguments to the function, emit an obnoxious warning. if (FT->getNumParams() < NumActualArgs) { if (!FT->isVarArg()) { - errs() << "WARNING: While resolving call to function '" - << Callee->getName() << "' arguments were dropped!\n"; + if (Callee->getName() != "main") { // @LOCALMOD + errs() << "WARNING: While resolving call to function '" + << Callee->getName() << "' arguments were dropped!\n"; + } } else { // Add all of the arguments in their promoted form to the arg list. for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) { diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt index f7bca064c7..001ba5d232 100644 --- a/lib/Transforms/LLVMBuild.txt +++ b/lib/Transforms/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize +subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize NaCl [component_0] type = Group diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile index 8b1df92fa2..ae03ff32c5 100644 --- a/lib/Transforms/Makefile +++ b/lib/Transforms/Makefile @@ -8,7 +8,11 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. -PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello +PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello NaCl + +ifeq ($(NACL_SANDBOX),1) + PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS)) +endif include $(LEVEL)/Makefile.config diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt new file mode 100644 index 0000000000..d634ad9655 --- /dev/null +++ b/lib/Transforms/NaCl/CMakeLists.txt @@ -0,0 +1,5 @@ +add_llvm_library(LLVMTransformsNaCl + ExpandCtors.cpp + ) + +add_dependencies(LLVMTransformsNaCl intrinsics_gen) diff --git a/lib/Transforms/NaCl/ExpandCtors.cpp b/lib/Transforms/NaCl/ExpandCtors.cpp new file mode 100644 index 0000000000..6b8130e4fb --- /dev/null +++ b/lib/Transforms/NaCl/ExpandCtors.cpp @@ -0,0 +1,145 @@ +//===- ExpandCtors.cpp - Convert ctors/dtors to concrete arrays -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass converts LLVM's special symbols llvm.global_ctors and +// llvm.global_dtors to concrete arrays, __init_array_start/end and +// __fini_array_start/end, that are usable by a C library. +// +// This pass sorts the contents of global_ctors/dtors according to the +// priority values they contain and removes the priority values. +// +//===----------------------------------------------------------------------===// + +#include <vector> + +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/NaCl.h" +#include "llvm/TypeBuilder.h" + +using namespace llvm; + +namespace { + struct ExpandCtors : public ModulePass { + static char ID; // Pass identification, replacement for typeid + ExpandCtors() : ModulePass(ID) { + initializeExpandCtorsPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnModule(Module &M); + }; +} + +char ExpandCtors::ID = 0; +INITIALIZE_PASS(ExpandCtors, "nacl-expand-ctors", + "Hook up constructor and destructor arrays to libc", + false, false) + +static void setGlobalVariableValue(Module &M, const char *Name, + Constant *Value) { + GlobalVariable *Var = M.getNamedGlobal(Name); + if (!Var) { + // This warning can happen in a program that does not use a libc + // and so does not call the functions in __init_array_start or + // __fini_array_end. Such a program might be linked with + // "-nostdlib". + errs() << "Warning: Variable " << Name << " not referenced\n"; + } else { + if (Var->hasInitializer()) { + report_fatal_error(std::string("Variable ") + Name + + " already has an initializer"); + } + Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType())); + Var->eraseFromParent(); + } +} + +struct FuncArrayEntry { + uint64_t priority; + Constant *func; +}; + +static bool compareEntries(FuncArrayEntry Entry1, FuncArrayEntry Entry2) { + return Entry1.priority < Entry2.priority; +} + +static void defineFuncArray(Module &M, const char *LlvmArrayName, + const char *StartSymbol, + const char *EndSymbol) { + std::vector<Constant*> Funcs; + + GlobalVariable *Array = M.getNamedGlobal(LlvmArrayName); + if (Array) { + if (Array->hasInitializer() && !Array->getInitializer()->isNullValue()) { + ConstantArray *InitList = cast<ConstantArray>(Array->getInitializer()); + std::vector<FuncArrayEntry> FuncsToSort; + for (unsigned Index = 0; Index < InitList->getNumOperands(); ++Index) { + ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(Index)); + FuncArrayEntry Entry; + Entry.priority = cast<ConstantInt>(CS->getOperand(0))->getZExtValue(); + Entry.func = CS->getOperand(1); + FuncsToSort.push_back(Entry); + } + + std::sort(FuncsToSort.begin(), FuncsToSort.end(), compareEntries); + for (std::vector<FuncArrayEntry>::iterator Iter = FuncsToSort.begin(); + Iter != FuncsToSort.end(); + ++Iter) { + Funcs.push_back(Iter->func); + } + } + // No code should be referencing global_ctors/global_dtors, + // because this symbol is internal to LLVM. + Array->eraseFromParent(); + } + + Type *FuncTy = FunctionType::get(Type::getVoidTy(M.getContext()), false); + Type *FuncPtrTy = FuncTy->getPointerTo(); + ArrayType *ArrayTy = ArrayType::get(FuncPtrTy, Funcs.size()); + GlobalVariable *NewArray = + new GlobalVariable(M, ArrayTy, /* isConstant= */ true, + GlobalValue::InternalLinkage, + ConstantArray::get(ArrayTy, Funcs)); + setGlobalVariableValue(M, StartSymbol, NewArray); + // We do this last so that LLVM gives NewArray the name + // "__{init,fini}_array_start" without adding any suffixes to + // disambiguate from the original GlobalVariable's name. This is + // not essential -- it just makes the output easier to understand + // when looking at symbols for debugging. + NewArray->setName(StartSymbol); + + // We replace "__{init,fini}_array_end" with the address of the end + // of NewArray. This removes the name "__{init,fini}_array_end" + // from the output, which is not ideal for debugging. Ideally we + // would convert "__{init,fini}_array_end" to being a GlobalAlias + // that points to the end of the array. However, unfortunately LLVM + // does not generate correct code when a GlobalAlias contains a + // GetElementPtr ConstantExpr. + Constant *NewArrayEnd = + ConstantExpr::getGetElementPtr(NewArray, + ConstantInt::get(M.getContext(), + APInt(32, 1))); + setGlobalVariableValue(M, EndSymbol, NewArrayEnd); +} + +bool ExpandCtors::runOnModule(Module &M) { + defineFuncArray(M, "llvm.global_ctors", + "__init_array_start", "__init_array_end"); + defineFuncArray(M, "llvm.global_dtors", + "__fini_array_start", "__fini_array_end"); + return true; +} + +ModulePass *llvm::createExpandCtorsPass() { + return new ExpandCtors(); +} diff --git a/lib/Transforms/NaCl/LLVMBuild.txt b/lib/Transforms/NaCl/LLVMBuild.txt new file mode 100644 index 0000000000..2f1522b3e5 --- /dev/null +++ b/lib/Transforms/NaCl/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/NaCl/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = NaCl +parent = Transforms +library_name = NaCl +required_libraries = Core diff --git a/lib/Transforms/NaCl/Makefile b/lib/Transforms/NaCl/Makefile new file mode 100644 index 0000000000..ecf8db6eae --- /dev/null +++ b/lib/Transforms/NaCl/Makefile @@ -0,0 +1,15 @@ +##===- lib/Transforms/NaCl/Makefile-------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMTransformsNaCl +BUILD_ARCHIVE = 1 + +include $(LEVEL)/Makefile.common + diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b3fc6e338c..06ef4b4a9b 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -32,6 +32,7 @@ add_llvm_library(LLVMScalarOpts SimplifyLibCalls.cpp Sink.cpp TailRecursionElimination.cpp + NaClCcRewrite.cpp ) add_dependencies(LLVMScalarOpts intrinsics_gen) diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp new file mode 100644 index 0000000000..5eace7f39d --- /dev/null +++ b/lib/Transforms/Scalar/NaClCcRewrite.cpp @@ -0,0 +1,1053 @@ +//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements calling convention rewrite for Native Client to ensure +// compatibility between pnacl and gcc generated code when calling +// ppapi interface functions. +//===----------------------------------------------------------------------===// + + +// Major TODOs: +// * dealing with vararg +// (We shoulf exclude all var arg functions and calls to them from rewrites) + +#define DEBUG_TYPE "naclcc" + +#include "llvm/Argument.h" +#include "llvm/Attributes.h" +#include "llvm/Constant.h" +#include "llvm/DataLayout.h" +#include "llvm/Instruction.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Function.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Transforms/Scalar.h" + +#include <vector> + +using namespace llvm; + +namespace llvm { + +cl::opt<bool> FlagEnableCcRewrite( + "nacl-cc-rewrite", + cl::desc("enable NaCl CC rewrite")); +} + +namespace { + +// This represents a rule for rewiriting types +struct TypeRewriteRule { + const char* src; // type pattern we are trying to match + const char* dst; // replacement type + const char* name; // name of the rule for diagnosis +}; + +// Note: all rules must be well-formed +// * parentheses must match +// * TODO: add verification for this + +// Legend: +// s(): struct (also used for unions) +// c: char (= 8 bit int) (only allowed for src) +// i: 32 bit int +// l: 64 bit int +// f: 32 bit float +// d: 64 bit float (= double) +// p: untyped pointer (only allowed for src) +// P(): typed pointer (currently not used, only allowed for src) +// F: generic function type (only allowed for src) + +// The X8664 Rewrite rules are also subject to +// register constraints, c.f.: section 3.2.3 +// http://www.x86-64.org/documentation/abi.pdf +// (roughly) for X8664: up to 2 regs per struct can be used for struct passsing +// and up to 2 regs for struct returns +// The rewrite rules are straight forward except for: s(iis(d)) => ll +// which would be straight forward if the frontend had lowered the union inside +// of PP_Var to s(l) instead of s(d), yielding: s(iis(l)) => ll +TypeRewriteRule ByvalRulesX8664[] = { + {"s(iis(d))", "ll", "PP_Var"}, + {"s(pp)", "l", "PP_ArrayOutput"}, + {"s(ppi)", "li", "PP_CompletionCallback"}, + {0, 0, 0}, +}; + +TypeRewriteRule SretRulesX8664[] = { + // Note: for srets, multireg returns are modeled as struct returns + {"s(iis(d))", "s(ll)", "PP_Var"}, + {"s(ff)", "d", "PP_FloatPoint"}, + {"s(ii)", "l", "PP_Point" }, + {"s(pp)", "l", "PP_ArrayOutput"}, + {0, 0, 0}, +}; + +// for ARM: up to 4 regs can be used for struct passsing +// and up to 2 float regs for struct returns +TypeRewriteRule ByvalRulesARM[] = { + {"s(iis(d))", "ll", "PP_Var"}, + {"s(ppi)", "iii", "PP_CompletionCallback" }, + {"s(pp)", "ii", "PP_ArrayOutput"}, + {0, 0, 0}, +}; + +TypeRewriteRule SretRulesARM[] = { + // Note: for srets, multireg returns are modeled as struct returns + {"s(ff)", "s(ff)", "PP_FloatPoint"}, + {0, 0, 0}, +}; + +// Helper class to model Register Usage as required by +// the x86-64 calling conventions +class RegUse { + uint32_t n_int_; + uint32_t n_float_; + + public: + RegUse(uint32_t n_int=0, uint32_t n_float=0) : + n_int_(n_int), n_float_(n_float) {} + + static RegUse OneIntReg() { return RegUse(1, 0); } + static RegUse OnePointerReg() { return RegUse(1, 0); } + static RegUse OneFloatReg() { return RegUse(0, 1); } + + RegUse operator+(RegUse other) const { + return RegUse(n_int_ + other.n_int_, n_float_ + other.n_float_); } + RegUse operator-(RegUse other) const { + return RegUse(n_int_ - other.n_int_, n_float_ - other.n_float_); } + bool operator==(RegUse other) const { + return n_int_ == other.n_int_ && n_float_ == other.n_float_; } + bool operator!=(RegUse other) const { + return n_int_ != other.n_int_ && n_float_ != other.n_float_; } + bool operator<=(RegUse other) const { + return n_int_ <= other.n_int_ && n_float_ <= other.n_float_; } + bool operator<(RegUse other) const { + return n_int_ < other.n_int_ && n_float_ < other.n_float_; } + bool operator>=(RegUse other) const { + return n_int_ >= other.n_int_ && n_float_ >= other.n_float_; } + bool operator>(RegUse other) const { + return n_int_ > other.n_int_ && n_float_ > other.n_float_; } + RegUse& operator+=(const RegUse& other) { + n_int_ += other.n_int_; n_float_ += other.n_float_; return *this;} + RegUse& operator-=(const RegUse& other) { + n_int_ -= other.n_int_; n_float_ -= other.n_float_; return *this;} + + friend raw_ostream& operator<<(raw_ostream &O, const RegUse& reg); +}; + +raw_ostream& operator<<(raw_ostream &O, const RegUse& reg) { + O << "(" << reg.n_int_ << ", " << reg.n_float_ << ")"; + return O; +} + +// TODO: Find a better way to determine the architecture +const TypeRewriteRule* GetByvalRewriteRulesForTarget( + const TargetLowering* tli) { + if (!FlagEnableCcRewrite) return 0; + + const TargetMachine &m = tli->getTargetMachine(); + const StringRef triple = m.getTargetTriple(); + + if (0 == triple.find("x86_64")) return ByvalRulesX8664; + if (0 == triple.find("i686")) return 0; + if (0 == triple.find("armv7a")) return ByvalRulesARM; + + llvm_unreachable("Unknown arch"); + return 0; +} + +// TODO: Find a better way to determine the architecture +const TypeRewriteRule* GetSretRewriteRulesForTarget( + const TargetLowering* tli) { + if (!FlagEnableCcRewrite) return 0; + + const TargetMachine &m = tli->getTargetMachine(); + const StringRef triple = m.getTargetTriple(); + + if (0 == triple.find("x86_64")) return SretRulesX8664; + if (0 == triple.find("i686")) return 0; + if (0 == triple.find("armv7a")) return SretRulesARM; + + llvm_unreachable("Unknown arch"); + return 0; +} + +// TODO: Find a better way to determine the architecture +// Describes the number of registers available for function +// argument passing which may affect rewrite decisions on +// some platforms. +RegUse GetAvailableRegsForTarget( + const TargetLowering* tli) { + if (!FlagEnableCcRewrite) return RegUse(0, 0); + + const TargetMachine &m = tli->getTargetMachine(); + const StringRef triple = m.getTargetTriple(); + + // integer: RDI, RSI, RDX, RCX, R8, R9 + // float XMM0, ..., XMM7 + if (0 == triple.find("x86_64")) return RegUse(6, 8); + // unused + if (0 == triple.find("i686")) return RegUse(0, 0); + // no constraints enforced here - the backend handles all the details + uint32_t max = std::numeric_limits<uint32_t>::max(); + if (0 == triple.find("armv7a")) return RegUse(max, max); + + llvm_unreachable("Unknown arch"); + return 0; +} + +// This class represents the a bitcode rewrite pass which ensures +// that all ppapi interfaces are calling convention compatible +// with gcc. This pass is archtitecture dependent. +struct NaClCcRewrite : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + const TypeRewriteRule* SretRewriteRules; + const TypeRewriteRule* ByvalRewriteRules; + const RegUse AvailableRegs; + + explicit NaClCcRewrite(const TargetLowering *tli = 0) + : FunctionPass(ID), + SretRewriteRules(GetSretRewriteRulesForTarget(tli)), + ByvalRewriteRules(GetByvalRewriteRulesForTarget(tli)), + AvailableRegs(GetAvailableRegsForTarget(tli)) { + initializeNaClCcRewritePass(*PassRegistry::getPassRegistry()); + } + + // main pass entry point + bool runOnFunction(Function &F); + + private: + void RewriteCallsite(Instruction* call, LLVMContext& C); + void RewriteFunctionPrologAndEpilog(Function& F); +}; + +char NaClCcRewrite::ID = 0; + +// This is only used for dst side of rules +Type* GetElementaryType(char c, LLVMContext& C) { + switch (c) { + case 'i': + return Type::getInt32Ty(C); + case 'l': + return Type::getInt64Ty(C); + case 'd': + return Type::getDoubleTy(C); + case 'f': + return Type::getFloatTy(C); + default: + dbgs() << c << "\n"; + llvm_unreachable("Unknown type specifier"); + return 0; + } +} + +// This is only used for the dst side of a rule +int GetElementaryTypeWidth(char c) { + switch (c) { + case 'i': + case 'f': + return 4; + case 'l': + case 'd': + return 8; + default: + llvm_unreachable("Unknown type specifier"); + return 0; + } +} + +// Check whether a type matches the *src* side pattern of a rewrite rule. +// Note that the pattern parameter is updated during the recursion +bool HasRewriteType(const Type* type, const char*& pattern) { + switch (*pattern++) { + case '\0': + return false; + case ')': + return false; + case 's': // struct and union are currently no distinguished + { + if (*pattern++ != '(') llvm_unreachable("malformed type pattern"); + if (!type->isStructTy()) return false; + // check struct members + const StructType* st = cast<StructType>(type); + for (StructType::element_iterator it = st->element_begin(), + end = st->element_end(); + it != end; + ++it) { + if (!HasRewriteType(*it, pattern)) return false; + } + // ensure we reached the end + int c = *pattern++; + return c == ')'; + } + break; + case 'c': + return type->isIntegerTy(8); + case 'i': + return type->isIntegerTy(32); + case 'l': + return type->isIntegerTy(64); + case 'd': + return type->isDoubleTy(); + case 'f': + return type->isFloatTy(); + case 'F': + return type->isFunctionTy(); + case 'p': // untyped pointer + return type->isPointerTy(); + case 'P': // typed pointer + { + if (*pattern++ != '(') llvm_unreachable("malformed type pattern"); + if (!type->isPointerTy()) return false; + Type* pointee = dyn_cast<PointerType>(type)->getElementType(); + if (!HasRewriteType(pointee, pattern)) return false; + int c = *pattern++; + return c == ')'; + } + default: + llvm_unreachable("Unknown type specifier"); + return false; + } +} + +RegUse RegUseForRewriteRule(const TypeRewriteRule* rule) { + const char* pattern = std::string("C") == rule->dst ? rule->src : rule->dst; + RegUse result(0, 0); + while (char c = *pattern++) { + // Note, we only support a subset here, complex types (s, P) + // would require more work + switch (c) { + case 'i': + case 'l': + result += RegUse::OneIntReg(); + break; + case 'd': + case 'f': + result += RegUse::OneFloatReg(); + break; + default: + dbgs() << c << "\n"; + llvm_unreachable("unexpected return type"); + } + } + return result; +} + +// Note, this only has to be accurate for x86-64 and is intentionally +// quite strict so that we know when to add support for new types. +// Ideally, unexpected types would be flagged by a bitcode checker. +RegUse RegUseForType(const Type* t) { + if (t->isPointerTy()) { + return RegUse::OnePointerReg(); + } else if (t->isFloatTy() || t->isDoubleTy()) { + return RegUse::OneFloatReg(); + } else if (t->isIntegerTy()) { + const IntegerType* it = dyn_cast<const IntegerType>(t); + unsigned width = it->getBitWidth(); + // x86-64 assumption here - use "register info" to make this better + if (width <= 64) return RegUse::OneIntReg(); + } + + dbgs() << *const_cast<Type*>(t) << "\n"; + llvm_unreachable("unexpected type in RegUseForType"); +} + +// Match a type against a set of rewrite rules. +// Return the matching rule, if any. +const TypeRewriteRule* MatchRewriteRules( + const Type* type, const TypeRewriteRule* rules) { + if (rules == 0) return 0; + for (; rules->name != 0; ++rules) { + const char* pattern = rules->src; + if (HasRewriteType(type, pattern)) return rules; + } + return 0; +} + +// Same as MatchRewriteRules but "dereference" type first. +const TypeRewriteRule* MatchRewriteRulesPointee(const Type* t, + const TypeRewriteRule* Rules) { + // sret and byval are both modelled as pointers + const PointerType* pointer = dyn_cast<PointerType>(t); + if (pointer == 0) return 0; + + return MatchRewriteRules(pointer->getElementType(), Rules); +} + +// Note, the attributes are not part of the type but are stored +// with the CallInst and/or the Function (if any) +Type* CreateFunctionPointerType(Type* result_type, + std::vector<Type*>& arguments) { + FunctionType* ft = FunctionType::get(result_type, + arguments, + false); + return PointerType::getUnqual(ft); +} + +// Determines whether a function body needs a rewrite +bool FunctionNeedsRewrite(const Function* fun, + const TypeRewriteRule* ByvalRewriteRules, + const TypeRewriteRule* SretRewriteRules, + RegUse available) { + // TODO: can this be detected on indirect callsites as well. + // if we skip the rewrite for the function body + // we also need to skip it at the callsites + // if (F.isVarArg()) return false; + + // Vectors and Arrays are not supported for compatibility + for (Function::const_arg_iterator AI = fun->arg_begin(), AE = fun->arg_end(); + AI != AE; + ++AI) { + const Type* t = AI->getType(); + if (isa<VectorType>(t) || isa<ArrayType>(t)) return false; + } + + for (Function::const_arg_iterator AI = fun->arg_begin(), AE = fun->arg_end(); + AI != AE; + ++AI) { + const Argument& a = *AI; + const Type* t = a.getType(); + // byval and srets are modelled as pointers (to structs) + if (t->isPointerTy()) { + Type* pointee = dyn_cast<PointerType>(t)->getElementType(); + + if (ByvalRewriteRules && a.hasByValAttr()) { + const TypeRewriteRule* rule = + MatchRewriteRules(pointee, ByvalRewriteRules); + if (rule != 0 && RegUseForRewriteRule(rule) <= available) { + return true; + } + } else if (SretRewriteRules && a.hasStructRetAttr()) { + if (0 != MatchRewriteRules(pointee, SretRewriteRules)) { + return true; + } + } + } + available -= RegUseForType(t); + } + return false; +} + +// Used for sret rewrites to determine the new function result type +Type* GetNewReturnType(Type* type, + const TypeRewriteRule* rule, + LLVMContext& C) { + if (std::string("l") == rule->dst || + std::string("d") == rule->dst) { + return GetElementaryType(rule->dst[0], C); + } else if (rule->dst[0] == 's') { + const char* cp = rule->dst + 2; // skip 's(' + std::vector<Type*> fields; + while (*cp != ')') { + fields.push_back(GetElementaryType(*cp, C)); + ++cp; + } + return StructType::get(C, fields, false /* isPacked */); + } else { + dbgs() << *type << " " << rule->name << "\n"; + llvm_unreachable("unexpected return type"); + return 0; + } +} + +// Rewrite sret parameter while rewriting a function +Type* RewriteFunctionSret(Function& F, + Value* orig_val, + const TypeRewriteRule* rule) { + LLVMContext& C = F.getContext(); + BasicBlock& entry = F.getEntryBlock(); + Instruction* before = &(entry.front()); + Type* old_type = orig_val->getType(); + Type* old_pointee = dyn_cast<PointerType>(old_type)->getElementType(); + Type* new_type = GetNewReturnType(old_type, rule, C); + // create a temporary to hold the return value as we no longer pass + // in the pointer + AllocaInst* tmp_ret = new AllocaInst(old_pointee, "result", before); + orig_val->replaceAllUsesWith(tmp_ret); + CastInst* cast_ret = CastInst::CreatePointerCast( + tmp_ret, + PointerType::getUnqual(new_type), + "byval_cast", + before); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); + II != IE; + /* see below */) { + Instruction* inst = II; + // we do decontructive magic below, so advance the iterator here + // (this is still a little iffy) + ++II; + ReturnInst* ret = dyn_cast<ReturnInst>(inst); + if (ret) { + if (ret->getReturnValue() != 0) + llvm_unreachable("expected a void return"); + // load the return value from temporary + Value *ret_val = new LoadInst(cast_ret, "load_result", ret); + // return that loaded value and delete the return instruction + ReturnInst::Create(C, ret_val, ret); + ret->eraseFromParent(); + } + } + } + return new_type; +} + +// Rewrite one byval function parameter while rewriting a function +void FixFunctionByvalsParameter(Function& F, + std::vector<Argument*>& new_arguments, + std::vector<Attributes>& new_attributes, + Value* byval, + const TypeRewriteRule* rule) { + LLVMContext& C = F.getContext(); + BasicBlock& entry = F.getEntryBlock(); + Instruction* before = &(entry.front()); + Twine prefix = byval->getName() + "_split"; + Type* t = byval->getType(); + Type* pointee = dyn_cast<PointerType>(t)->getElementType(); + AllocaInst* tmp_param = new AllocaInst(pointee, prefix + "_param", before); + byval->replaceAllUsesWith(tmp_param); + // convert byval poiner to char pointer + Value* base = CastInst::CreatePointerCast( + tmp_param, PointerType::getInt8PtrTy(C), prefix + "_base", before); + + int width = 0; + const char* pattern = rule->dst; + for (int offset = 0; *pattern; ++pattern, offset += width) { + width = GetElementaryTypeWidth(*pattern); + Type* t = GetElementaryType(*pattern, C); + Argument* arg = new Argument(t, prefix, &F); + Type* pt = PointerType::getUnqual(t); + // the code below generates something like: + // <CHAR-PTR> = getelementptr i8* <BASE>, i32 <OFFSET-FROM-BASE> + // <PTR> = bitcast i8* <CHAR-PTR> to <TYPE>* + // store <ARG> <TYPE>* <ELEM-PTR> + ConstantInt* baseOffset = ConstantInt::get(Type::getInt32Ty(C), offset); + Value *v; + v = GetElementPtrInst::Create(base, baseOffset, prefix + "_base_add", before); + v = CastInst::CreatePointerCast(v, pt, prefix + "_cast", before); + v = new StoreInst(arg, v, before); + + new_arguments.push_back(arg); + new_attributes.push_back(Attributes()); + } +} + +// Change function signature to reflect all the rewrites. +// This includes function type/signature and attributes. +void UpdateFunctionSignature(Function &F, + Type* new_result_type, + std::vector<Argument*>& new_arguments, + std::vector<Attributes>& new_attributes) { + DEBUG(dbgs() << "PHASE PROTOTYPE UPDATE\n"); + if (new_result_type) { + DEBUG(dbgs() << "NEW RESULT TYPE: " << *new_result_type << "\n"); + } + // Update function type + FunctionType* old_fun_type = F.getFunctionType(); + std::vector<Type*> new_types; + for (size_t i = 0; i < new_arguments.size(); ++i) { + new_types.push_back(new_arguments[i]->getType()); + } + + FunctionType* new_fun_type = FunctionType::get( + new_result_type ? new_result_type : old_fun_type->getReturnType(), + new_types, + false); + F.setType(PointerType::getUnqual(new_fun_type)); + + Function::ArgumentListType& args = F.getArgumentList(); + DEBUG(dbgs() << "PHASE ARGUMENT DEL " << args.size() << "\n"); + while (args.size()) { + Argument* arg = args.begin(); + DEBUG(dbgs() << "DEL " << arg->getArgNo() << " " << arg->getName() << "\n"); + args.remove(args.begin()); + } + + DEBUG(dbgs() << "PHASE ARGUMENT ADD " << new_arguments.size() << "\n"); + for (size_t i = 0; i < new_arguments.size(); ++i) { + Argument* arg = new_arguments[i]; + DEBUG(dbgs() << "ADD " << i << " " << arg->getName() << "\n"); + args.push_back(arg); + } + + DEBUG(dbgs() << "PHASE ATTRIBUTES UPDATE\n"); + std::vector<AttributeWithIndex> new_attributes_vec; + for (size_t i = 0; i < new_attributes.size(); ++i) { + Attributes attr = new_attributes[i]; + if (attr.hasAttributes()) { + new_attributes_vec.push_back(AttributeWithIndex::get(i + 1, attr)); + } + } + Attributes fattr = F.getAttributes().getFnAttributes(); + if (fattr.hasAttributes()) + new_attributes_vec.push_back(AttributeWithIndex::get(~0, fattr)); + F.setAttributes(AttrListPtr::get(new_attributes_vec)); +} + + +void ExtractFunctionArgsAndAttributes(Function& F, + std::vector<Argument*>& old_arguments, + std::vector<Attributes>& old_attributes) { + for (Function::arg_iterator ai = F.arg_begin(), + end = F.arg_end(); + ai != end; + ++ai) { + old_arguments.push_back(ai); + } + + for (size_t i = 0; i < old_arguments.size(); ++i) { + // index zero is for return value attributes + old_attributes.push_back(F.getParamAttributes(i + 1)); + } +} + +// Apply byval or sret rewrites to function body. +void NaClCcRewrite::RewriteFunctionPrologAndEpilog(Function& F) { + + DEBUG(dbgs() << "\nFUNCTION-REWRITE\n"); + + DEBUG(dbgs() << "FUNCTION BEFORE "); + DEBUG(dbgs() << F); + DEBUG(dbgs() << "\n"); + + std::vector<Argument*> new_arguments; + std::vector<Attributes> new_attributes; + std::vector<Argument*> old_arguments; + std::vector<Attributes> old_attributes; + + + // make a copy of everything first as create Argument adds them to the list + ExtractFunctionArgsAndAttributes(F, old_arguments, old_attributes); + + // A non-zero new_result_type indicates an sret rewrite + Type* new_result_type = 0; + + // only the first arg can be "sret" + if (old_attributes.size() > 0 && old_attributes[0].hasAttribute(Attributes::StructRet)) { + const TypeRewriteRule* sret_rule = + MatchRewriteRulesPointee(old_arguments[0]->getType(), SretRewriteRules); + if (sret_rule) { + Argument* arg = old_arguments[0]; + DEBUG(dbgs() << "REWRITING SRET " + << " arg " << arg->getName() << " " << sret_rule->name << "\n"); + new_result_type = RewriteFunctionSret(F, arg, sret_rule); + old_arguments.erase(old_arguments.begin()); + old_attributes.erase(old_attributes.begin()); + } + } + + // now deal with the byval arguments + RegUse available = AvailableRegs; + for (size_t i = 0; i < old_arguments.size(); ++i) { + Argument* arg = old_arguments[i]; + Type* t = arg->getType(); + Attributes attr = old_attributes[i]; + if (attr.hasAttribute(Attributes::ByVal)) { + const TypeRewriteRule* rule = + MatchRewriteRulesPointee(t, ByvalRewriteRules); + if (rule != 0 && RegUseForRewriteRule(rule) <= available) { + DEBUG(dbgs() << "REWRITING BYVAL " + << *t << " arg " << arg->getName() << " " << rule->name << "\n"); + FixFunctionByvalsParameter(F, + new_arguments, + new_attributes, + arg, + rule); + available -= RegUseForRewriteRule(rule); + continue; + } + } + + // fall through case - no rewrite is happening + new_arguments.push_back(arg); + new_attributes.push_back(attr); + available -= RegUseForType(t); + } + + UpdateFunctionSignature(F, new_result_type, new_arguments, new_attributes); + + DEBUG(dbgs() << "FUNCTION AFTER "); + DEBUG(dbgs() << F); + DEBUG(dbgs() << "\n"); +} + +// used for T in {CallInst, InvokeInst} +// TODO(robertm): try unifying this code with FunctionNeedsRewrite() +template<class T> bool CallNeedsRewrite( + const Instruction* inst, + const TypeRewriteRule* ByvalRewriteRules, + const TypeRewriteRule* SretRewriteRules, + RegUse available) { + + const T* call = cast<T>(inst); + // skip non parameter operands at the end + size_t num_params = call->getNumOperands() - (isa<CallInst>(inst) ? 1 : 3); + + // Vectors and Arrays are not supported for compatibility + for (size_t i = 0; i < num_params; ++i) { + Type* t = call->getOperand(i)->getType(); + if (isa<VectorType>(t) || isa<ArrayType>(t)) return false; + } + + for (size_t i = 0; i < num_params; ++i) { + Type* t = call->getOperand(i)->getType(); + // byval and srets are modelled as pointers (to structs) + if (t->isPointerTy()) { + Type* pointee = dyn_cast<PointerType>(t)->getElementType(); + + // param zero is for the return value + if (ByvalRewriteRules && call->paramHasAttr(i + 1, Attributes::ByVal)) { + const TypeRewriteRule* rule = + MatchRewriteRules(pointee, ByvalRewriteRules); + if (rule != 0 && RegUseForRewriteRule(rule) <= available) { + return true; + } + } else if (SretRewriteRules && + call->paramHasAttr(i + 1, Attributes::StructRet)) { + if (0 != MatchRewriteRules(pointee, SretRewriteRules)) { + return true; + } + } + } + available -= RegUseForType(t); + } + return false; +} + +// This code will load the fields of the byval ptr into scalar variables +// which will then be used as argument when we rewrite the actual call +// instruction. +void PrependCompensationForByvals(std::vector<Value*>& new_operands, + std::vector<Attributes>& new_attributes, + Instruction* call, + Value* byval, + const TypeRewriteRule* rule, + LLVMContext& C) { + // convert byval poiner to char pointer + Value* base = CastInst::CreatePointerCast( + byval, PointerType::getInt8PtrTy(C), "byval_base", call); + + int width = 0; + const char* pattern = rule->dst; + for (int offset = 0; *pattern; ++pattern, offset += width) { + width = GetElementaryTypeWidth(*pattern); + Type* t = GetElementaryType(*pattern, C); + Type* pt = PointerType::getUnqual(t); + // the code below generates something like: + // <CHAR-PTR> = getelementptr i8* <BASE>, i32 <OFFSET-FROM-BASE> + // <PTR> = bitcast i8* <CHAR-PTR> to i32* + // <SCALAR> = load i32* <ELEM-PTR> + ConstantInt* baseOffset = ConstantInt::get(Type::getInt32Ty(C), offset); + Value* v; + v = GetElementPtrInst::Create(base, baseOffset, "byval_base_add", call); + v = CastInst::CreatePointerCast(v, pt, "byval_cast", call); + v = new LoadInst(v, "byval_extract", call); + + new_operands.push_back(v); + new_attributes.push_back(Attributes()); + } +} + +// Note: this will only be called if we expect a rewrite to occur +void CallsiteFixupSrets(Instruction* call, + Value* sret, + Type* new_type, + const TypeRewriteRule* rule) { + const char* pattern = rule->dst; + Instruction* next; + if (isa<CallInst>(call)) { + next = call->getNextNode(); + } else if (isa<InvokeInst>(call)) { + // if this scheme turns out to be too simplistic (i.e. asserts fire) + // we need to introduce a new basic block for the compensation code. + BasicBlock* normal = dyn_cast<InvokeInst>(call)->getNormalDest(); + if (!normal->getSinglePredecessor()) { + llvm_unreachable("unexpected invoke normal bb"); + } + next = normal->getFirstNonPHI(); + } else { + llvm_unreachable("unexpected call instruction"); + } + + if (next == 0) { + llvm_unreachable("unexpected missing next instruction"); + } + + if (pattern[0] == 's' || + std::string("l") == pattern || + std::string("d") == pattern) { + Type* pt = PointerType::getUnqual(new_type); + Value* cast = CastInst::CreatePointerCast(sret, pt, "cast", next); + new StoreInst(call, cast, next); + } else { + dbgs() << rule->name << "\n"; + llvm_unreachable("unexpected return type at fix up"); + } +} + +void ExtractOperandsAndAttributesFromCallInst( + CallInst* call, + std::vector<Value*>& operands, + std::vector<Attributes>& attributes) { + + AttrListPtr PAL = call->getAttributes(); + // last operand is: function + for (size_t i = 0; i < call->getNumOperands() - 1; ++i) { + operands.push_back(call->getArgOperand(i)); + // index zero is for return value attributes + attributes.push_back(PAL.getParamAttributes(i + 1)); + } +} + +// Note: this differs from the one above in the loop bounds +void ExtractOperandsAndAttributesFromeInvokeInst( + InvokeInst* call, + std::vector<Value*>& operands, + std::vector<Attributes>& attributes) { + AttrListPtr PAL = call->getAttributes(); + // last three operands are: function, bb-normal, bb-exception + for (size_t i = 0; i < call->getNumOperands() - 3; ++i) { + operands.push_back(call->getArgOperand(i)); + // index zero is for return value attributes + attributes.push_back(PAL.getParamAttributes(i + 1)); + } +} + + +Instruction* ReplaceCallInst(CallInst* call, + Type* function_pointer, + std::vector<Value*>& new_operands, + std::vector<Attributes>& new_attributes) { + Value* v = CastInst::CreatePointerCast( + call->getCalledValue(), function_pointer, "fp_cast", call); + CallInst* new_call = CallInst::Create(v, new_operands, "", call); + // NOTE: tail calls may be ruled out but byval/sret, should we assert this? + // TODO: did wid forget to clone anything else? + new_call->setTailCall(call->isTailCall()); + new_call->setCallingConv(call->getCallingConv()); + for (size_t i = 0; i < new_attributes.size(); ++i) { + // index zero is for return value attributes + new_call->addAttribute(i + 1, new_attributes[i]); + } + return new_call; +} + +Instruction* ReplaceInvokeInst(InvokeInst* call, + Type* function_pointer, + std::vector<Value*>& new_operands, + std::vector<Attributes>& new_attributes) { + Value* v = CastInst::CreatePointerCast( + call->getCalledValue(), function_pointer, "fp_cast", call); + InvokeInst* new_call = InvokeInst::Create(v, + call->getNormalDest(), + call->getUnwindDest(), + new_operands, + "", + call); + for (size_t i = 0; i < new_attributes.size(); ++i) { + // index zero is for return value attributes + new_call->addAttribute(i + 1, new_attributes[i]); + } + return new_call; +} + + +void NaClCcRewrite::RewriteCallsite(Instruction* call, LLVMContext& C) { + BasicBlock* BB = call->getParent(); + + DEBUG(dbgs() << "\nCALLSITE-REWRITE\n"); + DEBUG(dbgs() << "CALLSITE BB BEFORE " << *BB); + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << *call << "\n"); + if (isa<InvokeInst>(call)) { + DEBUG(dbgs() << "\n" << *(dyn_cast<InvokeInst>(call)->getNormalDest())); + } + + // new_result(_type) is only relevent if an sret is rewritten + // whish is indicated by sret_rule != 0 + const TypeRewriteRule* sret_rule = 0; + Type* new_result_type = call->getType(); + // This is the sret which was originally passed in as the first arg. + // After the rewrite we simply copy the function result into it. + Value* new_result = 0; + + std::vector<Value*> old_operands; + std::vector<Attributes> old_attributes; + if (isa<CallInst>(call)) { + ExtractOperandsAndAttributesFromCallInst( + cast<CallInst>(call), old_operands, old_attributes); + } else if (isa<InvokeInst>(call)) { + ExtractOperandsAndAttributesFromeInvokeInst( + cast<InvokeInst>(call), old_operands, old_attributes); + } else { + llvm_unreachable("Unexpected instruction type"); + } + + // handle sret (just the book-keeping, 'new_result' is dealt with below) + // only the first arg can be "sret" + if (old_attributes[0].hasAttribute(Attributes::StructRet)) { + sret_rule = MatchRewriteRulesPointee( + old_operands[0]->getType(), SretRewriteRules); + if (sret_rule) { + new_result_type = + GetNewReturnType(old_operands[0]->getType(), sret_rule, C); + new_result = old_operands[0]; + old_operands.erase(old_operands.begin()); + old_attributes.erase(old_attributes.begin()); + } + } + + // handle byval + std::vector<Value*> new_operands; + std::vector<Attributes> new_attributes; + RegUse available = AvailableRegs; + + for (size_t i = 0; i < old_operands.size(); ++i) { + Value *operand = old_operands[i]; + Type* t = operand->getType(); + Attributes attr = old_attributes[i]; + + if (attr.hasAttribute(Attributes::ByVal)) { + const TypeRewriteRule* rule = + MatchRewriteRulesPointee(t, ByvalRewriteRules); + if (rule != 0 && RegUseForRewriteRule(rule) <= available) { + DEBUG(dbgs() << "REWRITING BYVAL " + << *t << " arg " << i << " " << rule->name << "\n"); + PrependCompensationForByvals(new_operands, + new_attributes, + call, + operand, + rule, + C); + available -= RegUseForRewriteRule(rule); + continue; + } + } + + // fall through case - no rewrite is happening + new_operands.push_back(operand); + new_attributes.push_back(attr); + available -= RegUseForType(t); + } + + // Note, this code is tricky. + // Initially we used a much more elaborate scheme introducing + // new function declarations for direct calls. + // This simpler scheme, however, works for both direct and + // indirect calls + // We transform (here the direct case): + // call void @result_PP_FloatPoint(%struct.PP_FloatPoint* sret %sret) + // into + // %fp_cast = bitcast void (%struct.PP_FloatPoint*)* + // @result_PP_FloatPoint to %struct.PP_FloatPoint ()* + // %result = call %struct.PP_FloatPoint %fp_cast() + // + std::vector<Type*> new_arg_types; + for (size_t i = 0; i < new_operands.size(); ++i) { + new_arg_types.push_back(new_operands[i]->getType()); + } + + DEBUG(dbgs() << "REWRITE CALL INSTRUCTION\n"); + Instruction* new_call = 0; + if (isa<CallInst>(call)) { + new_call = ReplaceCallInst( + cast<CallInst>(call), + CreateFunctionPointerType(new_result_type, new_arg_types), + new_operands, + new_attributes); + } else if (isa<InvokeInst>(call)) { + new_call = ReplaceInvokeInst( + cast<InvokeInst>(call), + CreateFunctionPointerType(new_result_type, new_arg_types), + new_operands, + new_attributes); + } else { + llvm_unreachable("Unexpected instruction type"); + } + + // We prepended the new call, now get rid of the old one. + // If we did not change the return type, there may be consumers + // of the result which must be redirected. + if (!sret_rule) { + call->replaceAllUsesWith(new_call); + } + call->eraseFromParent(); + + // Add compensation codes for srets if necessary + if (sret_rule) { + DEBUG(dbgs() << "REWRITING SRET " << sret_rule->name << "\n"); + CallsiteFixupSrets(new_call, new_result, new_result_type, sret_rule); + } + + DEBUG(dbgs() << "CALLSITE BB AFTER" << *BB); + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << *new_call << "\n"); + if (isa<InvokeInst>(call)) { + DEBUG(dbgs() << "\n" << *(dyn_cast<InvokeInst>(call)->getNormalDest())); + } +} + +bool NaClCcRewrite::runOnFunction(Function &F) { + // No rules - no action + if (ByvalRewriteRules == 0 && SretRewriteRules == 0) return false; + + bool Changed = false; + + if (FunctionNeedsRewrite(&F, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) { + DEBUG(dbgs() << "FUNCTION NEEDS REWRITE " << F.getName() << "\n"); + RewriteFunctionPrologAndEpilog(F); + Changed = true; + } + + // Find all the calls and invokes in F and rewrite them if necessary + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); + II != IE; + /* II updated below */) { + Instruction* inst = II; + // we do decontructive magic below, so advance the iterator here + // (this is still a little iffy) + ++II; + if (isa<InvokeInst>(inst) || isa<CallInst>(inst)) { + // skip calls to llvm.dbg.declare, etc. + if (isa<IntrinsicInst>(inst)) continue; + + if (isa<CallInst>(inst) && + !CallNeedsRewrite<CallInst> + (inst, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) continue; + + if (isa<InvokeInst>(inst) && + !CallNeedsRewrite<InvokeInst> + (inst, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) continue; + + RewriteCallsite(inst, F.getContext()); + Changed = true; + } + } + } + return Changed; +} + +} // end anonymous namespace + + +INITIALIZE_PASS(NaClCcRewrite, "naclcc", "NaCl CC Rewriter", false, false) + +FunctionPass *llvm::createNaClCcRewritePass(const TargetLowering *tli) { + return new NaClCcRewrite(tli); +} diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp index c428b889c3..ad7a872b1f 100644 --- a/lib/VMCore/Globals.cpp +++ b/lib/VMCore/Globals.cpp @@ -45,6 +45,116 @@ void GlobalValue::destroyConstant() { llvm_unreachable("You can't GV->destroyConstant()!"); } +// @LOCALMOD-BEGIN + +// Extract the version information from GV. +static void ExtractVersion(const GlobalValue *GV, + StringRef *Name, + StringRef *Ver, + bool *IsDefault) { + // The version information is stored in the GlobalValue's name, e.g.: + // + // GV Name Name Ver IsDefault + // ------------------------------------ + // foo@@V1 --> foo V1 true + // bar@V2 --> bar V2 false + // baz --> baz false + + StringRef GVName = GV->getName(); + size_t atpos = GVName.find("@"); + if (atpos == StringRef::npos) { + *Name = GVName; + *Ver = ""; + *IsDefault = false; + return; + } + *Name = GVName.substr(0, atpos); + ++atpos; + if (atpos < GVName.size() && GVName[atpos] == '@') { + *IsDefault = true; + ++atpos; + } else { + *IsDefault = false; + } + *Ver = GVName.substr(atpos); +} + +// Set the version information on GV. +static void SetVersion(Module *M, + GlobalValue *GV, + StringRef Ver, + bool IsDefault) { + StringRef Name; + StringRef PrevVersion; + bool PrevIsDefault; + ExtractVersion(GV, &Name, &PrevVersion, &PrevIsDefault); + + // If this symbol already has a version, make sure it matches. + if (!PrevVersion.empty()) { + if (!PrevVersion.equals(Ver) || PrevIsDefault != IsDefault) { + llvm_unreachable("Trying to override symbol version info!"); + } + return; + } + // If there's no version to set, there's nothing to do. + if (Ver.empty()) + return; + + // Make sure the versioned symbol name doesn't already exist. + std::string NewName = Name.str() + (IsDefault ? "@@" : "@") + Ver.str(); + if (M->getNamedValue(NewName)) { + // It may make sense to do this as long as one of the globals being + // merged is only a declaration. But since this situation seems to be + // a corner case, for now it is unimplemented. + llvm_unreachable("Merging unversioned global into " + "existing versioned global is unimplemented"); + } + GV->setName(NewName); +} + +StringRef GlobalValue::getUnversionedName() const { + StringRef Name; + StringRef Ver; + bool IsDefaultVersion; + ExtractVersion(this, &Name, &Ver, &IsDefaultVersion); + return Name; +} + +StringRef GlobalValue::getVersion() const { + StringRef Name; + StringRef Ver; + bool IsDefaultVersion; + ExtractVersion(this, &Name, &Ver, &IsDefaultVersion); + return Ver; +} + +bool GlobalValue::isDefaultVersion() const { + StringRef Name; + StringRef Ver; + bool IsDefaultVersion; + ExtractVersion(this, &Name, &Ver, &IsDefaultVersion); + // It is an error to call this function on an unversioned symbol. + assert(!Ver.empty()); + return IsDefaultVersion; +} + +void GlobalValue::setVersionDef(StringRef Version, bool IsDefault) { + // This call only makes sense for definitions. + assert(!isDeclaration()); + SetVersion(Parent, this, Version, IsDefault); +} + +void GlobalValue::setNeeded(StringRef Version, StringRef DynFile) { + // This call makes sense on declarations or + // available-externally definitions. + // TODO(pdox): If this is a definition, should we turn it + // into a declaration here? + assert(isDeclaration() || hasAvailableExternallyLinkage()); + SetVersion(Parent, this, Version, false); + Parent->addNeededRecord(DynFile, this); +} +// @LOCALMOD-END + /// copyAttributesFrom - copy all additional attributes (those not needed to /// create a GlobalValue) from the GlobalValue Src to this one. void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 5b5176b3c7..a6e335c10c 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/LeakDetector.h" +#include "llvm/Support/ErrorHandling.h" // @LOCALMOD #include "SymbolTableListTraitsImpl.h" #include <algorithm> #include <cstdarg> @@ -467,3 +468,181 @@ void Module::removeLibrary(StringRef Lib) { return; } } + + +// @LOCALMOD-BEGIN +// TODO(pdox): +// If possible, use actual bitcode records instead of NamedMetadata. +// This is contingent upon whether we can get these changes upstreamed +// immediately, to avoid creating incompatibilities in the bitcode format. + +static std::string +ModuleMetaGet(const Module *module, StringRef MetaName) { + NamedMDNode *node = module->getNamedMetadata(MetaName); + if (node == NULL) + return ""; + assert(node->getNumOperands() == 1); + MDNode *subnode = node->getOperand(0); + assert(subnode->getNumOperands() == 1); + MDString *value = dyn_cast<MDString>(subnode->getOperand(0)); + assert(value != NULL); + return value->getString(); +} + +static void +ModuleMetaSet(Module *module, StringRef MetaName, StringRef ValueStr) { + NamedMDNode *node = module->getNamedMetadata(MetaName); + if (node) + module->eraseNamedMetadata(node); + node = module->getOrInsertNamedMetadata(MetaName); + MDString *value = MDString::get(module->getContext(), ValueStr); + node->addOperand(MDNode::get(module->getContext(), + makeArrayRef(static_cast<Value*>(value)))); +} + +const std::string &Module::getSOName() const { + if (ModuleSOName == "") + ModuleSOName.assign(ModuleMetaGet(this, "SOName")); + return ModuleSOName; +} + +void Module::setSOName(StringRef Name) { + ModuleMetaSet(this, "SOName", Name); + ModuleSOName = Name; +} + +void Module::setOutputFormat(Module::OutputFormat F) { + const char *formatStr; + switch (F) { + case ObjectOutputFormat: formatStr = "object"; break; + case SharedOutputFormat: formatStr = "shared"; break; + case ExecutableOutputFormat: formatStr = "executable"; break; + default: + llvm_unreachable("Unrecognized output format in setOutputFormat()"); + } + ModuleMetaSet(this, "OutputFormat", formatStr); +} + +Module::OutputFormat Module::getOutputFormat() const { + std::string formatStr = ModuleMetaGet(this, "OutputFormat"); + if (formatStr == "" || formatStr == "object") + return ObjectOutputFormat; + else if (formatStr == "shared") + return SharedOutputFormat; + else if (formatStr == "executable") + return ExecutableOutputFormat; + llvm_unreachable("Invalid module compile type in getOutputFormat()"); +} + +void +Module::wrapSymbol(StringRef symName) { + std::string wrapSymName("__wrap_"); + wrapSymName += symName; + + std::string realSymName("__real_"); + realSymName += symName; + + GlobalValue *SymGV = getNamedValue(symName); + GlobalValue *WrapGV = getNamedValue(wrapSymName); + GlobalValue *RealGV = getNamedValue(realSymName); + + // Replace uses of "sym" with __wrap_sym. + if (SymGV) { + if (!WrapGV) + WrapGV = cast<GlobalValue>(getOrInsertGlobal(wrapSymName, + SymGV->getType())); + SymGV->replaceAllUsesWith(ConstantExpr::getBitCast(WrapGV, + SymGV->getType())); + } + + // Replace uses of "__real_sym" with "sym". + if (RealGV) { + if (!SymGV) + SymGV = cast<GlobalValue>(getOrInsertGlobal(symName, RealGV->getType())); + RealGV->replaceAllUsesWith(ConstantExpr::getBitCast(SymGV, + RealGV->getType())); + } +} + +// The metadata key prefix for NeededRecords. +static const char *NeededPrefix = "NeededRecord_"; + +void +Module::dumpMeta(raw_ostream &OS) const { + OS << "OutputFormat: "; + switch (getOutputFormat()) { + case Module::ObjectOutputFormat: OS << "object"; break; + case Module::SharedOutputFormat: OS << "shared"; break; + case Module::ExecutableOutputFormat: OS << "executable"; break; + } + OS << "\n"; + OS << "SOName: " << getSOName() << "\n"; + for (Module::lib_iterator L = lib_begin(), + E = lib_end(); + L != E; ++L) { + OS << "NeedsLibrary: " << (*L) << "\n"; + } + std::vector<NeededRecord> NList; + getNeededRecords(&NList); + for (unsigned i = 0; i < NList.size(); ++i) { + const NeededRecord &NR = NList[i]; + OS << StringRef(NeededPrefix) << NR.DynFile << ": "; + for (unsigned j = 0; j < NR.Symbols.size(); ++j) { + if (j != 0) + OS << " "; + OS << NR.Symbols[j]; + } + OS << "\n"; + } +} + +void Module::addNeededRecord(StringRef DynFile, GlobalValue *GV) { + if (DynFile.empty()) { + // We never resolved this symbol, even after linking. + // This should only happen in a shared object. + // It is safe to ignore this symbol, and let the dynamic loader + // figure out where it comes from. + return; + } + std::string Key = NeededPrefix; + Key += DynFile; + // Get the node for this file. + NamedMDNode *Node = getOrInsertNamedMetadata(Key); + // Add this global value's name to the list. + MDString *value = MDString::get(getContext(), GV->getName()); + Node->addOperand(MDNode::get(getContext(), + makeArrayRef(static_cast<Value*>(value)))); +} + +// Get the NeededRecord for SOName. +// Returns an empty NeededRecord if there was no metadata found. +static void getNeededRecordFor(const Module *M, + StringRef SOName, + Module::NeededRecord *NR) { + NR->DynFile = SOName; + NR->Symbols.clear(); + + std::string Key = NeededPrefix; + Key += SOName; + NamedMDNode *Node = M->getNamedMetadata(Key); + if (!Node) + return; + + for (unsigned k = 0; k < Node->getNumOperands(); ++k) { + // Insert the symbol name. + const MDString *SymName = + dyn_cast<MDString>(Node->getOperand(k)->getOperand(0)); + NR->Symbols.push_back(SymName->getString()); + } +} + +// Place the complete list of needed records in NeededOut. +void Module::getNeededRecords(std::vector<NeededRecord> *NeededOut) const { + // Iterate through the libraries needed, grabbing each NeededRecord. + for (lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) { + NeededRecord NR; + getNeededRecordFor(this, *I, &NR); + NeededOut->push_back(NR); + } +} +// @LOCALMOD-END
\ No newline at end of file diff --git a/lib/Wrap/LLVMBuild.txt b/lib/Wrap/LLVMBuild.txt new file mode 100644 index 0000000000..8750711338 --- /dev/null +++ b/lib/Wrap/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/Wrap/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Wrap +parent = Libraries diff --git a/lib/Wrap/Makefile b/lib/Wrap/Makefile new file mode 100644 index 0000000000..79aa2b3531 --- /dev/null +++ b/lib/Wrap/Makefile @@ -0,0 +1,14 @@ +##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +LIBRARYNAME = LLVMWrap +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/Wrap/bitcode_wrapperer.cpp b/lib/Wrap/bitcode_wrapperer.cpp new file mode 100644 index 0000000000..eeb2825793 --- /dev/null +++ b/lib/Wrap/bitcode_wrapperer.cpp @@ -0,0 +1,355 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +#include "llvm/Wrap/bitcode_wrapperer.h" + +#include <stdio.h> +#include <sys/stat.h> + +using std::vector; + +// The number of bytes in a 32 bit integer. +static const uint32_t kWordSize = 4; + +// Number of LLVM-defined fixed fields in the header. +static const uint32_t kLLVMFields = 4; + +// Total number of fixed fields in the header. +static const uint32_t kFixedFields = 7; + +// The magic number that must exist for bitcode wrappers. +static const uint32_t kWrapperMagicNumber = 0x0B17C0DE; + +// The version number associated with a wrapper file. +// Note: llvm currently only allows the value 0. When this changes, +// we should consider making this a command line option. +static const uint32_t kLLVMVersionNumber = 0; + +// Fields defined by Android bitcode header. +static const uint32_t kAndroidHeaderVersion = 0; +static const uint32_t kAndroidTargetAPI = 0; +static const uint32_t kAndroidDefaultCompilerVersion = 0; +static const uint32_t kAndroidDefaultOptimizationLevel = 3; + +// PNaCl bitcode version number. +static const uint32_t kPnaclBitcodeVersion = 0; + +// Max size for variable fields. Currently only used for writing them +// out to files (the parsing works for arbitrary sizes). +static const size_t kMaxVariableFieldSize = 256; + +BitcodeWrapperer::BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile) + : infile_(infile), + outfile_(outfile), + buffer_size_(0), + cursor_(0), + infile_at_eof_(false), + infile_bc_offset_(0), + wrapper_bc_offset_(0), + wrapper_bc_size_(0), + android_header_version_(kAndroidHeaderVersion), + android_target_api_(kAndroidTargetAPI), + pnacl_bc_version_(0), + error_(false) { + buffer_.resize(kBitcodeWrappererBufferSize); + if (IsInputBitcodeWrapper()) { + ParseWrapperHeader(); + } else if (IsInputBitcodeFile()) { + wrapper_bc_offset_ = kWordSize * kFixedFields; + wrapper_bc_size_ = GetInFileSize(); + } else { + fprintf(stderr, "Error: input file is not a bitcode file.\n"); + error_ = true; + } +} + +BitcodeWrapperer::~BitcodeWrapperer() { + for(size_t i = 0; i < variable_field_data_.size(); i++) { + delete [] variable_field_data_[i]; + } +} + + +void BitcodeWrapperer::ClearBuffer() { + buffer_size_ = 0; + cursor_ = 0; + infile_at_eof_ = false; +} + +bool BitcodeWrapperer::Seek(uint32_t pos) { + if (infile_ != NULL && infile_->Seek(pos)) { + ClearBuffer(); + return true; + } + return false; +} + +bool BitcodeWrapperer::CanReadWord() { + if (GetBufferUnreadBytes() < kWordSize) { + FillBuffer(); + return GetBufferUnreadBytes() >= kWordSize; + } else { + return true; + } +} + +void BitcodeWrapperer::FillBuffer() { + if (cursor_ > 0) { + // Before filling, move any remaining bytes to the + // front of the buffer. This allows us to assume + // that after the call to FillBuffer, readable + // text is contiguous. + if (cursor_ < buffer_size_) { + size_t i = 0; + while (cursor_ < buffer_size_) { + buffer_[i++] = buffer_[cursor_++]; + } + cursor_ = 0; + buffer_size_ = i; + } + } else { + // Assume the buffer contents have been used, + // and we want to completely refill it. + buffer_size_ = 0; + } + + // If we don't have an input, we can't refill the buffer at all. + if (infile_ == NULL) { + return; + } + + // Now fill in remaining space. + size_t needed = buffer_.size() - buffer_size_; + + while (buffer_.size() > buffer_size_) { + int actually_read = infile_->Read(&buffer_[buffer_size_], needed); + if (infile_->AtEof()) { + infile_at_eof_ = true; + } + if (actually_read) { + buffer_size_ += actually_read; + needed -= actually_read; + } else if (infile_at_eof_) { + break; + } + } +} + +bool BitcodeWrapperer::ReadWord(uint32_t& word) { + if (!CanReadWord()) return false; + word = (((uint32_t) BufferLookahead(0)) << 0) + | (((uint32_t) BufferLookahead(1)) << 8) + | (((uint32_t) BufferLookahead(2)) << 16) + | (((uint32_t) BufferLookahead(3)) << 24); + cursor_ += kWordSize; + return true; +} + +bool BitcodeWrapperer::WriteWord(uint32_t value) { + uint8_t buffer[kWordSize]; + buffer[3] = (value >> 24) & 0xFF; + buffer[2] = (value >> 16) & 0xFF; + buffer[1] = (value >> 8) & 0xFF; + buffer[0] = (value >> 0) & 0xFF; + return outfile_->Write(buffer, kWordSize); +} + +bool BitcodeWrapperer::WriteVariableFields() { + // This buffer may have to be bigger if we start using the fields + // for larger things. + uint8_t buffer[kMaxVariableFieldSize]; + for (vector<BCHeaderField>::iterator it = header_fields_.begin(); + it != header_fields_.end(); ++it) { + if (!it->Write(buffer, kMaxVariableFieldSize) || + !outfile_->Write(buffer, it->GetTotalSize())) { + return false; + } + } + return true; +} + +bool BitcodeWrapperer::ParseWrapperHeader() { + // Make sure LLVM-defined fields have been parsed + if (!IsInputBitcodeWrapper()) return false; + // Check the android/pnacl fields + if (!ReadWord(android_header_version_) || + !ReadWord(android_target_api_) || !ReadWord(pnacl_bc_version_)) { + fprintf(stderr, "Error: file not long enough to contain header\n"); + return false; + } + if (pnacl_bc_version_ != kPnaclBitcodeVersion) { + fprintf(stderr, "Error: bad PNaCl Bitcode version\n"); + return false; + } + int field_data_total = wrapper_bc_offset_ - kWordSize * kFixedFields; + if (field_data_total > 0) { + // Read in the variable fields. We need to allocate space for the data. + int field_data_read = 0; + + while (field_data_read < field_data_total) { + FillBuffer(); + size_t buffer_needed = BCHeaderField::GetDataSizeFromSerialized( + &buffer_[cursor_]); + if (buffer_needed > buffer_.size()) { + buffer_.resize(buffer_needed + + sizeof(BCHeaderField::FixedSubfield) * 2); + FillBuffer(); + } + variable_field_data_.push_back(new uint8_t[buffer_needed]); + + BCHeaderField field(BCHeaderField::kInvalid, 0, + variable_field_data_.back()); + field.Read(&buffer_[cursor_], buffer_size_); + header_fields_.push_back(field); + size_t field_size = field.GetTotalSize(); + cursor_ += field_size; + field_data_read += field_size; + if (field_data_read > field_data_total) { + // We read too much data, the header is corrupted + fprintf(stderr, "Error: raw bitcode offset inconsistent with " + "variable field data\n"); + return false; + } + } + Seek(0); + } + return true; +} + +bool BitcodeWrapperer::IsInputBitcodeWrapper() { + ResetCursor(); + // First make sure that there are enough words (LLVM header) + // to peek at. + if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) { + FillBuffer(); + if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) return false; + } + + // Now make sure the magic number is right. + uint32_t first_word; + if ((!ReadWord(first_word)) || + (kWrapperMagicNumber != first_word)) return false; + + // Make sure the version is right. + uint32_t second_word; + if ((!ReadWord(second_word)) || + (kLLVMVersionNumber != second_word)) return false; + + // Make sure that the offset and size (for llvm) is defined. + uint32_t bc_offset; + uint32_t bc_size; + if (ReadWord(bc_offset) && + ReadWord(bc_size)) { + // Before returning, save the extracted values. + wrapper_bc_offset_ = bc_offset; + infile_bc_offset_ = bc_offset; + wrapper_bc_size_ = bc_size; + return true; + } + // If reached, unable to read wrapped header. + return false; +} + +bool BitcodeWrapperer::IsInputBitcodeFile() { + ResetCursor(); + // First make sure that there are four bytes to peek at. + if (GetBufferUnreadBytes() < kWordSize) { + FillBuffer(); + if (GetBufferUnreadBytes() < kWordSize) return false; + } + // If reached, Check if first 4 bytes match bitcode + // file magic number. + return (BufferLookahead(0) == 'B') && + (BufferLookahead(1) == 'C') && + (BufferLookahead(2) == 0xc0) && + (BufferLookahead(3) == 0xde); +} + +bool BitcodeWrapperer::BufferCopyInToOut(uint32_t size) { + while (size > 0) { + // Be sure buffer is non-empty before writing. + if (0 == buffer_size_) { + FillBuffer(); + if (0 == buffer_size_) { + return false; + } + } + // copy the buffer to the output file. + size_t block = (buffer_size_ < size) ? buffer_size_ : size; + if (!outfile_->Write(&buffer_[cursor_], block)) return false; + size -= block; + buffer_size_ = 0; + } + // Be sure that there isn't more bytes on the input stream. + FillBuffer(); + return buffer_size_ == 0; +} + +void BitcodeWrapperer::AddHeaderField(BCHeaderField* field) { + vector<BCHeaderField>::iterator it = header_fields_.begin(); + for (; it != header_fields_.end(); ++it) { + // If this field is the same as an existing one, overwrite it. + if (it->getID() == field->getID()) { + wrapper_bc_offset_ += (field->GetTotalSize() - it->GetTotalSize()); + *it = *field; + break; + } + } + if (it == header_fields_.end()) { // there was no match, add a new field + header_fields_.push_back(*field); + wrapper_bc_offset_ += field->GetTotalSize(); + } +} + +bool BitcodeWrapperer::WriteBitcodeWrapperHeader() { + return + // Note: This writes out the 4 word header required by llvm wrapped + // bitcode. + WriteWord(kWrapperMagicNumber) && + WriteWord(kLLVMVersionNumber) && + WriteWord(wrapper_bc_offset_) && + WriteWord(wrapper_bc_size_) && + // 2 fixed fields defined by Android + WriteWord(android_header_version_) && + WriteWord(android_target_api_) && + // PNaClBitcode version + WriteWord(kPnaclBitcodeVersion) && + // Common variable-length fields + WriteVariableFields(); +} + +void BitcodeWrapperer::PrintWrapperHeader() { + if (error_) { + fprintf(stderr, "Error condition exists: the following" + "data may not be reliable\n"); + } + fprintf(stderr, "Wrapper magic:\t\t%x\n", kWrapperMagicNumber); + fprintf(stderr, "LLVM Bitcode version:\t%d\n", kLLVMVersionNumber); + fprintf(stderr, "Raw bitcode offset:\t%d\n", wrapper_bc_offset_); + fprintf(stderr, "Raw bitcode size:\t%d\n", wrapper_bc_size_); + fprintf(stderr, "Android header version:\t%d\n", android_header_version_); + fprintf(stderr, "Android target API:\t%d\n", android_target_api_); + fprintf(stderr, "PNaCl bitcode version:\t%d\n", kPnaclBitcodeVersion); + for (size_t i = 0; i < header_fields_.size(); i++) header_fields_[i].Print(); +} + +bool BitcodeWrapperer::GenerateWrappedBitcodeFile() { + if (!error_ && + WriteBitcodeWrapperHeader() && + Seek(infile_bc_offset_) && + BufferCopyInToOut(wrapper_bc_size_)) { + off_t dangling = wrapper_bc_size_ & 3; + if (dangling) { + return outfile_->Write((const uint8_t*) "\0\0\0\0", 4 - dangling); + } + return true; + } + return false; +} + +bool BitcodeWrapperer::GenerateRawBitcodeFile() { + return !error_ && Seek(infile_bc_offset_) && + BufferCopyInToOut(wrapper_bc_size_); +} diff --git a/lib/Wrap/file_wrapper_input.cpp b/lib/Wrap/file_wrapper_input.cpp new file mode 100644 index 0000000000..fc592e0246 --- /dev/null +++ b/lib/Wrap/file_wrapper_input.cpp @@ -0,0 +1,53 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +#include <sys/stat.h> +#include <stdlib.h> + +#include "llvm/Wrap/file_wrapper_input.h" + +FileWrapperInput::FileWrapperInput(const std::string& name) : + _name(name), _at_eof(false), _size_found(false), _size(0) { + _file = fopen(name.c_str(), "rb"); + if (NULL == _file) { + fprintf(stderr, "Unable to open: %s\n", name.c_str()); + exit(1); + } +} + +FileWrapperInput::~FileWrapperInput() { + fclose(_file); +} + +size_t FileWrapperInput::Read(uint8_t* buffer, size_t wanted) { + size_t found = fread((char*) buffer, 1, wanted, _file); + if (feof(_file) || ferror(_file)) { + _at_eof = true; + } + return found; +} + +bool FileWrapperInput::AtEof() { + return _at_eof; +} + +off_t FileWrapperInput::Size() { + if (_size_found) return _size; + struct stat st; + if (0 == stat(_name.c_str(), &st)) { + _size_found = true; + _size = st.st_size; + return _size; + } else { + fprintf(stderr, "Unable to compute file size: %s\n", _name.c_str()); + exit(1); + } + // NOT REACHABLE. + return 0; +} + +bool FileWrapperInput::Seek(uint32_t pos) { + return 0 == fseek(_file, (long) pos, SEEK_SET); +} diff --git a/lib/Wrap/file_wrapper_output.cpp b/lib/Wrap/file_wrapper_output.cpp new file mode 100644 index 0000000000..f9f126868d --- /dev/null +++ b/lib/Wrap/file_wrapper_output.cpp @@ -0,0 +1,37 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ + +#include "llvm/Wrap/file_wrapper_output.h" +#include <stdlib.h> + + +FileWrapperOutput::FileWrapperOutput(const std::string& name) + : _name(name) { + _file = fopen(name.c_str(), "wb"); + if (NULL == _file) { + fprintf(stderr, "Unable to open: %s\n", name.c_str()); + exit(1); + } +} + +FileWrapperOutput::~FileWrapperOutput() { + fclose(_file); +} + +bool FileWrapperOutput::Write(uint8_t byte) { + return EOF != fputc(byte, _file); +} + +bool FileWrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) { + if (!buffer) { + return false; + } + + if (buffer_size > 0) { + return buffer_size == fwrite(buffer, 1, buffer_size, _file); + } else { + return true; + } +} diff --git a/lib/Wrap/wrapper_output.cpp b/lib/Wrap/wrapper_output.cpp new file mode 100644 index 0000000000..493f29efa8 --- /dev/null +++ b/lib/Wrap/wrapper_output.cpp @@ -0,0 +1,9 @@ +#include "llvm/Wrap/wrapper_output.h" + +bool WrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) { + // Default implementation that uses the byte write routine. + for (size_t i = 0; i < buffer_size; ++i) { + if (!Write(buffer[i])) return false; + } + return true; +} diff --git a/projects/sample/autoconf/config.sub b/projects/sample/autoconf/config.sub index 9942491533..8f5793aef3 100755 --- a/projects/sample/autoconf/config.sub +++ b/projects/sample/autoconf/config.sub @@ -132,6 +132,10 @@ case $maybe_os in os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; + nacl) + os=-nacl + basic_machine=pnacl-unknown + ;; *) basic_machine=`echo $1 | sed 's/-[^-]*$//'` if [ $basic_machine != $1 ] @@ -347,6 +351,8 @@ case $basic_machine in i*86 | x86_64) basic_machine=$basic_machine-pc ;; + pnacl-*) + ;; # Object if more than one company name word. *-*-*) echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2 @@ -1364,6 +1370,8 @@ case $os in ;; esac ;; + -nacl) + ;; -nto-qnx*) ;; -nto*) diff --git a/projects/sample/configure b/projects/sample/configure index cfbb6c6922..3baa1a7e16 100755 --- a/projects/sample/configure +++ b/projects/sample/configure @@ -3686,6 +3686,11 @@ else llvm_cv_no_link_all_option="-Wl,--no-whole-archive" llvm_cv_os_type="GNU" llvm_cv_platform_type="Unix" ;; + *-*-nacl*) + llvm_cv_link_all_option="-Wl,--whole-archive" + llvm_cv_no_link_all_option="-Wl,--no-whole-archive" + llvm_cv_os_type="NativeClient" + llvm_cv_platform_type="Unix" ;; *-*-solaris*) llvm_cv_link_all_option="-Wl,-z,allextract" llvm_cv_no_link_all_option="-Wl,-z,defaultextract" diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index cdfaf7f4c1..4800743a5f 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mattr=-avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s ; RUN: llc < %s -mattr=+avx -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort -mtriple=x86_64-none-nacl | FileCheck %s --check-prefix=NACL64 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" @@ -301,6 +302,11 @@ define void @test23(i8* noalias sret %result) { ; CHECK: call ; CHECK: movq %rdi, %rax ; CHECK: ret +; NACL64: test23: +; NACL64: call +; NACL64: movl %edi, %eax +; NACL64: popq %rcx +; NACL64: nacljmp %ecx, %r15 } declare i8* @foo23() diff --git a/test/NaCl/ARM/lit.local.cfg b/test/NaCl/ARM/lit.local.cfg new file mode 100644 index 0000000000..1f10377867 --- /dev/null +++ b/test/NaCl/ARM/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.s'] + +targets = set(config.root.targets_to_build.split()) +if not 'ARM' in targets: + config.unsupported = True + diff --git a/test/NaCl/ARM/nacl-read-tp-intrinsic.ll b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll new file mode 100644 index 0000000000..1050b902ed --- /dev/null +++ b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll @@ -0,0 +1,20 @@ + +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=asm %s -o - \ +; RUN: | FileCheck -check-prefix=ARM %s + +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=asm -mtls-use-call %s -o - \ +; RUN: | FileCheck -check-prefix=ARM_IRT %s + + +declare i8* @llvm.nacl.read.tp() + +define i8* @get_thread_pointer() { + %tp = call i8* @llvm.nacl.read.tp() + ret i8* %tp +} + +; ARM: get_thread_pointer: +; ARM: ldr r0, [r9] + +; ARM_IRT: get_thread_pointer: +; ARM_IRT: bl __aeabi_read_tp diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll new file mode 100644 index 0000000000..8fd580bb49 --- /dev/null +++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll @@ -0,0 +1,116 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.8 {{{d[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.16 {{{d[0-9]+}}}, [r0] + ret void +} + +define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.32 {{{d[0-9]+}}}, [r0] + ret void +} + +define void @vst1f(float* %A, <2 x float>* %B) nounwind { + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.32 {{{d[0-9]+}}}, [r0] + ret void +} + +define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <1 x i64>* %B + call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.64 {{{d[0-9]+}}}, [r0] + ret void +} + +define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128] + ret void +} + +define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +define void @vst1Qf(float* %A, <4 x float>* %B) nounwind { + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind { + %tmp0 = bitcast i64* %A to i8* + %tmp1 = load <2 x i64>* %B + call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +;Check for a post-increment updating store. +define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind { + %A = load float** %ptr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vst1.32 {{{d[0-9]+}}}, [r1]! + %tmp2 = getelementptr float* %A, i32 2 + store float* %tmp2, float** %ptr + ret void +} + +declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind + +declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind +declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind + diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll new file mode 100644 index 0000000000..e87373c174 --- /dev/null +++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll @@ -0,0 +1,95 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128] + ret void +} + +define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +define void @vst2f(float* %A, <2 x float>* %B) nounwind { + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <2 x float>* %B + call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %B + call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :128] + ret void +} + +define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :256] + ret void +} + +define void @vst2Qf(float* %A, <4 x float>* %B) nounwind { + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0] + ret void +} + +;Check for a post-increment updating store with register increment. +define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind { + %A = load i8** %ptr + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r1], r2 + %tmp2 = getelementptr i8* %A, i32 %inc + store i8* %tmp2, i8** %ptr + ret void +} + +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind + +declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll new file mode 100644 index 0000000000..b496c0c592 --- /dev/null +++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0] + ret void +} + +define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0] + ret void +} + +;Check for a post-increment updating store. +define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind { + %A = load i16** %ptr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! + %tmp2 = getelementptr i16* %A, i32 24 + store i16* %tmp2, i16** %ptr + ret void +} + +declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind + +declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll new file mode 100644 index 0000000000..032f194231 --- /dev/null +++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64] + ret void +} + +define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128] + ret void +} + +define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256] + ret void +} + +;Check for a post-increment updating store. +define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind { + %A = load float** %ptr + %tmp0 = bitcast float* %A to i8* + %tmp1 = load <4 x float>* %B + call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! + %tmp2 = getelementptr float* %A, i32 16 + store float* %tmp2, float** %ptr + ret void +} + +declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind +declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind + +declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind +declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind +declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind + diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll new file mode 100644 index 0000000000..5b4dc63a14 --- /dev/null +++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll @@ -0,0 +1,196 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[3]}, [r0] + ret void +} + +define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp1 = load <4 x i16>* %B + %tmp2 = extractelement <4 x i16> %tmp1, i32 2 + store i16 %tmp2, i16* %A, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[2]}, [r0, :16] + ret void +} + +define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp1 = load <2 x i32>* %B + %tmp2 = extractelement <2 x i32> %tmp1, i32 1 + store i32 %tmp2, i32* %A, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.32 {d{{[0-9]+}}[1]}, [r0, :32] + ret void +} + +define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind { + %tmp1 = load <16 x i8>* %B + %tmp2 = extractelement <16 x i8> %tmp1, i32 9 + store i8 %tmp2, i8* %A, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.8 {d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp1 = load <8 x i16>* %B + %tmp2 = extractelement <8 x i16> %tmp1, i32 5 + store i16 %tmp2, i16* %A, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst1.16 {d{{[0-9]+}}[1]}, [r0, :16] + ret void +} + +define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :16] + ret void +} + +define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :32] + ret void +} + +define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0, :64] + ret void +} + +define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst3.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind { + %tmp1 = load <8 x i8>* %B + call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :32] + ret void +} + +define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] + ret void +} + +define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>* %B + call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :128] + ret void +} + +define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind { + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <8 x i16>* %B + call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0, :64] + ret void +} + +define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind { + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <4 x i32>* %B + call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vst4.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0] + ret void +} + +declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind +declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind + +;Check for a post-increment updating store with register increment. +define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind { + %A = load i16** %ptr + %tmp0 = bitcast i16* %A to i8* + %tmp1 = load <4 x i16>* %B + call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2) +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r1], r2 + %tmp2 = getelementptr i16* %A, i32 %inc + store i16* %tmp2, i16** %ptr + ret void +} diff --git a/test/NaCl/ARM/simple-load-store_sandboxing1.ll b/test/NaCl/ARM/simple-load-store_sandboxing1.ll new file mode 100644 index 0000000000..417bb1f389 --- /dev/null +++ b/test/NaCl/ARM/simple-load-store_sandboxing1.ll @@ -0,0 +1,27 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -sfi-load -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @foo(i32* %input, i32* %output) nounwind { +entry: + %input.addr = alloca i32*, align 4 + %output.addr = alloca i32*, align 4 + store i32* %input, i32** %input.addr, align 4 + store i32* %output, i32** %output.addr, align 4 + %0 = load i32** %input.addr, align 4 + %1 = load i32* %0, align 4 + +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: ldr r0, [r0] + + %add = add nsw i32 %1, 4 + %2 = load i32** %output.addr, align 4 + store i32 %add, i32* %2, align 4 + +; CHECK: bic r1, r1, #3221225472 +; CHECK-NEXT: str r0, [r1] + + ret void +} + + + diff --git a/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll b/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll new file mode 100644 index 0000000000..a8b3cf1c16 --- /dev/null +++ b/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -sfi-load -sfi-stack -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @foo(i32* %input, i32* %output) nounwind { +entry: + %input.addr = alloca i32*, align 4 + %output.addr = alloca i32*, align 4 + %temp = alloca i32, align 4 + +; CHECK: sub sp, sp +; CHECK-NEXT: bic sp, sp, #3221225472 + + store i32* %input, i32** %input.addr, align 4 + store i32* %output, i32** %output.addr, align 4 + %0 = load i32** %input.addr, align 4 + %arrayidx = getelementptr inbounds i32* %0, i32 1 + %1 = load i32* %arrayidx, align 4 + store i32 %1, i32* %temp, align 4 + %2 = load i32* %temp, align 4 + %3 = load i32** %output.addr, align 4 + %arrayidx1 = getelementptr inbounds i32* %3, i32 0 + store i32 %2, i32* %arrayidx1, align 4 + +; CHECK: add sp, sp +; CHECK-NEXT: bic sp, sp, #3221225472 + + ret void +} diff --git a/test/NaCl/ARM/vstr-sandboxing1.ll b/test/NaCl/ARM/vstr-sandboxing1.ll new file mode 100644 index 0000000000..6646cbc717 --- /dev/null +++ b/test/NaCl/ARM/vstr-sandboxing1.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=obj %s -o - \ +; RUN: | llvm-objdump -disassemble -triple armv7 - | FileCheck %s + +define void @test_vstr_sandbox(<8 x i8>* %ptr) nounwind { + %1 = insertelement <8 x i8> undef, i8 -128, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + store <8 x i8> %2, <8 x i8>* %ptr, align 8 +; CHECK: bic r0, r0, #3221225472 +; CHECK-NEXT: vstr {{[0-9a-z]+}}, [r0] + + ret void +} + diff --git a/test/NaCl/X86/lit.local.cfg b/test/NaCl/X86/lit.local.cfg new file mode 100644 index 0000000000..56bf008595 --- /dev/null +++ b/test/NaCl/X86/lit.local.cfg @@ -0,0 +1,6 @@ +config.suffixes = ['.ll', '.s'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + diff --git a/test/NaCl/X86/nacl-read-tp-intrinsic.ll b/test/NaCl/X86/nacl-read-tp-intrinsic.ll new file mode 100644 index 0000000000..2779f1b1e1 --- /dev/null +++ b/test/NaCl/X86/nacl-read-tp-intrinsic.ll @@ -0,0 +1,44 @@ + +; RUN: llc -mtriple=i386-unknown-nacl -filetype=asm %s -o - \ +; RUN: | FileCheck -check-prefix=X32 %s + +; RUN: llc -mtriple=i386-unknown-nacl -filetype=asm -mtls-use-call %s -o - \ +; RUN: | FileCheck -check-prefix=USE_CALL %s + +; RUN: llc -mtriple=x86_64-unknown-nacl -filetype=asm %s -o - \ +; RUN: | FileCheck -check-prefix=USE_CALL %s + +; "-mtls-use-call" should not make any difference on x86-64. +; RUN: llc -mtriple=x86_64-unknown-nacl -filetype=asm -mtls-use-call %s -o - \ +; RUN: | FileCheck -check-prefix=USE_CALL %s + + +declare i8* @llvm.nacl.read.tp() + +define i8* @get_thread_pointer() { + %tp = call i8* @llvm.nacl.read.tp() + ret i8* %tp +} + +; X32: get_thread_pointer: +; X32: movl %gs:0, %eax + +; USE_CALL: get_thread_pointer: +; USE_CALL: naclcall __nacl_read_tp + + +; Make sure that we do not generate: +; movl $1000, %eax +; addl %gs:0, %eax +; The x86-32 NaCl validator only accepts %gs with "mov", not with +; "add". Note that we had to use a large immediate to trigger the bug +; and generate the code above. +define i8* @get_thread_pointer_add() { + %tp = call i8* @llvm.nacl.read.tp() + %result = getelementptr i8* %tp, i32 1000 + ret i8* %result +} + +; X32: get_thread_pointer_add: +; X32: movl %gs:0, %eax +; X32: addl $1000, %eax diff --git a/test/Transforms/NaCl/expand-ctors-empty.ll b/test/Transforms/NaCl/expand-ctors-empty.ll new file mode 100644 index 0000000000..4368270765 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors-empty.ll @@ -0,0 +1,11 @@ +; Currently we do not define __{init,fini}_array_end as named aliases. +; RUN: opt < %s -nacl-expand-ctors -S | not grep __init_array_end +; RUN: opt < %s -nacl-expand-ctors -S | not grep __fini_array_end + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +; If llvm.global_ctors is not present, it is treated as if it is an +; empty array, and __{init,fini}_array_start are defined anyway. + +; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer diff --git a/test/Transforms/NaCl/expand-ctors-zeroinit.ll b/test/Transforms/NaCl/expand-ctors-zeroinit.ll new file mode 100644 index 0000000000..d02741f0b5 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors-zeroinit.ll @@ -0,0 +1,16 @@ +; Currently we do not define __{init,fini}_array_end as named aliases. +; RUN: opt < %s -nacl-expand-ctors -S | not grep __init_array_end +; RUN: opt < %s -nacl-expand-ctors -S | not grep __fini_array_end + +; We expect this symbol to be removed: +; RUN: opt < %s -nacl-expand-ctors -S | not grep llvm.global_ctors + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +; If llvm.global_ctors is zeroinitializer, it should be treated the +; same as an empty array. + +@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer + +; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer diff --git a/test/Transforms/NaCl/expand-ctors.ll b/test/Transforms/NaCl/expand-ctors.ll new file mode 100644 index 0000000000..7f202618e7 --- /dev/null +++ b/test/Transforms/NaCl/expand-ctors.ll @@ -0,0 +1,36 @@ +; We expect these symbol names to be removed: +; RUN: opt < %s -nacl-expand-ctors -S | not grep llvm.global_ctors +; RUN: opt < %s -nacl-expand-ctors -S | not grep __init_array_end +; RUN: opt < %s -nacl-expand-ctors -S | not grep __fini_array_end + +; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s + +@llvm.global_ctors = appending global [3 x { i32, void ()* }] + [{ i32, void ()* } { i32 300, void ()* @init_func_A }, + { i32, void ()* } { i32 100, void ()* @init_func_B }, + { i32, void ()* } { i32 200, void ()* @init_func_C }] + +@__init_array_start = extern_weak global [0 x void ()*] +@__init_array_end = extern_weak global [0 x void ()*] + +; CHECK: @__init_array_start = internal constant [3 x void ()*] [void ()* @init_func_B, void ()* @init_func_C, void ()* @init_func_A] +; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer + +define void @init_func_A() { ret void } +define void @init_func_B() { ret void } +define void @init_func_C() { ret void } + +define [0 x void ()*]* @get_array_start() { + ret [0 x void ()*]* @__init_array_start; +} +; CHECK: @get_array_start() +; CHECK: ret {{.*}} @__init_array_start + +define [0 x void ()*]* @get_array_end() { + ret [0 x void ()*]* @__init_array_end; +} + +; @get_array_end() is converted to use a GetElementPtr that returns +; the end of the generated array: +; CHECK: @get_array_end() +; CHECK: ret {{.*}} bitcast ([3 x void ()*]* getelementptr inbounds ([3 x void ()*]* @__init_array_start, i32 1) diff --git a/tools/Makefile b/tools/Makefile index a29e49f0a1..17e8380677 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \ bugpoint llvm-bcanalyzer \ llvm-diff macho-dump llvm-objdump llvm-readobj \ llvm-rtdyld llvm-dwarfdump llvm-cov \ - llvm-size llvm-stress llvm-mcmarkup + llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub # Let users override the set of tools to build from the command line. ifdef ONLY_TOOLS diff --git a/tools/bc-wrap/LLVMBuild.txt b/tools/bc-wrap/LLVMBuild.txt new file mode 100644 index 0000000000..b515fc04b9 --- /dev/null +++ b/tools/bc-wrap/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llc/LLVMBuild.txt --------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = bc-wrap +parent = Tools +required_libraries = Wrap all-targets diff --git a/tools/bc-wrap/Makefile b/tools/bc-wrap/Makefile new file mode 100644 index 0000000000..dccff2ecde --- /dev/null +++ b/tools/bc-wrap/Makefile @@ -0,0 +1,20 @@ +#===- tools/bc-wrap/Makefile -----------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = bc-wrap + +# Include this here so we can get the configuration of the targets +# that have been configured for construction. We have to do this +# early so we can set up LINK_COMPONENTS before including Makefile.rules +include $(LEVEL)/Makefile.config + +LINK_COMPONENTS := $(TARGETS_TO_BUILD) Wrap + +include $(LLVM_SRC_ROOT)/Makefile.rules diff --git a/tools/bc-wrap/bc_wrap.cpp b/tools/bc-wrap/bc_wrap.cpp new file mode 100644 index 0000000000..5311f714ee --- /dev/null +++ b/tools/bc-wrap/bc_wrap.cpp @@ -0,0 +1,123 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + */ +/* + * Utility to wrap a .bc file, using LLVM standard+ custom headers. + */ + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Wrap/bitcode_wrapperer.h" +#include "llvm/Wrap/file_wrapper_input.h" +#include "llvm/Wrap/file_wrapper_output.h" + +#include <ctype.h> +#include <string.h> + +using namespace llvm; + +static cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input file>"), cl::Required); + +static cl::opt<std::string> +OutputFilename("o", cl::desc("<output file>")); + +static cl::opt<bool> UnwrapFlag("u", + cl::desc("unwrap rather than wrap the file"), + cl::init(false)); + +static cl::opt<bool> VerboseFlag("v", + cl::desc("print verbose header information"), + cl::init(false)); + +static cl::opt<bool> DryRunFlag("n", + cl::desc("Dry run (implies -v)"), + cl::init(false)); + +// Accept the hash on the command line to avoid having to include sha1 +// library with the LLVM code +static cl::opt<std::string> BitcodeHash("hash", + cl::desc("Hash of bitcode (ignored if -u is given)")); + +const int kMaxBinaryHashLen = 32; + +// Convert ASCII hex hash to binary hash. return buffer and length. +// The caller must free the returned buffer. +static uint8_t* ParseBitcodeHash(int* len) { + if (BitcodeHash.size() > kMaxBinaryHashLen * 2 || + BitcodeHash.size() % 2) return NULL; + *len = BitcodeHash.size() / 2; + uint8_t* buf = new uint8_t[*len]; + const char* arg = BitcodeHash.data(); + for (size_t i = 0; i < BitcodeHash.size() / 2; i++) { + unsigned int r; // glibc has %hhx but it's nonstandard + if (!isxdigit(*(arg + 2 * i + 1)) || // sscanf ignores trailing junk + !sscanf(arg + 2 * i, "%2x", &r) || + r > std::numeric_limits<uint8_t>::max()) { + delete [] buf; + return NULL; + } + buf[i] = static_cast<uint8_t>(r); + } + return buf; +} + +int main(const int argc, const char* argv[]) { + bool success = true; + cl::ParseCommandLineOptions(argc, argv, "bitcode wrapper/unwrapper\n"); + if (OutputFilename == "") { + // Default to input file = output file. The cl lib doesn't seem to + // directly support initializing one opt from another. + OutputFilename = InputFilename; + } + if (DryRunFlag) VerboseFlag = true; + sys::fs::file_status outfile_status; + std::string outfile_temp; + outfile_temp = std::string(OutputFilename) + ".temp"; + if (UnwrapFlag) { + FileWrapperInput inbc(InputFilename); + FileWrapperOutput outbc(outfile_temp); + BitcodeWrapperer wrapperer(&inbc, &outbc); + if (wrapperer.IsInputBitcodeWrapper()) { + if (VerboseFlag) { + fprintf(stderr, "Headers read from infile:\n"); + wrapperer.PrintWrapperHeader(); + } + if (DryRunFlag) + return 0; + success = wrapperer.GenerateRawBitcodeFile(); + } + } else { + FileWrapperInput inbc(InputFilename); + FileWrapperOutput outbc(outfile_temp); + BitcodeWrapperer wrapperer(&inbc, &outbc); + if (BitcodeHash.size()) { + // SHA-2 hash is 256 bit + int hash_len; + uint8_t* buf = ParseBitcodeHash(&hash_len); + if (!buf) { + fprintf(stderr, "Bitcode hash must be a hex string <= 64 chars.\n"); + exit(1); + } + BCHeaderField hash(BCHeaderField::kBitcodeHash, hash_len, buf); + wrapperer.AddHeaderField(&hash); + } + if (VerboseFlag) { + fprintf(stderr, "Headers generated:\n"); + wrapperer.PrintWrapperHeader(); + } + if (DryRunFlag) + return 0; + success = wrapperer.GenerateWrappedBitcodeFile(); + } + error_code ec; + if ((ec = sys::fs::rename(outfile_temp, OutputFilename))) { + fprintf(stderr, "Could not rename temporary: %s\n", ec.message().c_str()); + success = false; + } + if (success) return 0; + fprintf(stderr, "error: Unable to generate a proper %s bitcode file!\n", + (UnwrapFlag ? "unwrapped" : "wrapped")); + return 1; +} diff --git a/tools/gold/Makefile b/tools/gold/Makefile index 496e31cc39..31812e1f8c 100644 --- a/tools/gold/Makefile +++ b/tools/gold/Makefile @@ -14,6 +14,10 @@ LINK_LIBS_IN_SHARED := 1 SHARED_LIBRARY := 1 LOADABLE_MODULE := 1 +# @LOCALMOD: this forces to appear -lLTO *after* the object file +# on the linkline. This is necessary for linking on ubuntu precise. +# Otherwise LLVMgold.so will not have a dt_needed entry for LTO +EXTRA_LIBS := -lLTO EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports # Include this here so we can get the configuration of the targets diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index b0a0dd2a40..1c3a01b1e8 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -55,6 +55,25 @@ namespace { ld_plugin_set_extra_library_path set_extra_library_path = NULL; ld_plugin_get_view get_view = NULL; ld_plugin_message message = discard_message; + // @LOCALMOD-BEGIN + // REL, DYN, or EXEC + ld_plugin_output_file_type linker_output; + + // Callback for getting link soname from gold + ld_plugin_get_output_soname get_output_soname = NULL; + + // Callback for getting needed libraries from gold + ld_plugin_get_needed get_needed = NULL; + + // Callback for getting number of needed library from gold + ld_plugin_get_num_needed get_num_needed = NULL; + + // Callback for getting the number of --wrap'd symbols. + ld_plugin_get_num_wrapped get_num_wrapped = NULL; + + // Callback for getting the name of a wrapped symbol. + ld_plugin_get_wrapped get_wrapped = NULL; + // @LOCALMOD-END int api_version = 0; int gold_version = 0; @@ -62,11 +81,17 @@ namespace { struct claimed_file { void *handle; std::vector<ld_plugin_symbol> syms; + bool is_linked_in; // @LOCALMOD }; lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC; std::string output_name = ""; std::list<claimed_file> Modules; + + // @LOCALMOD-BEGIN + std::vector<std::string> DepLibs; + // @LOCALMOD-END + std::vector<sys::Path> Cleanup; lto_code_gen_t code_gen = NULL; } @@ -74,6 +99,7 @@ namespace { namespace options { enum generate_bc { BC_NO, BC_ALSO, BC_ONLY }; static bool generate_api_file = false; + static bool gather_then_link = true; // @LOCALMOD static generate_bc generate_bc_file = BC_NO; static std::string bc_path; static std::string obj_path; @@ -103,6 +129,10 @@ namespace options { triple = opt.substr(strlen("mtriple=")); } else if (opt.startswith("obj-path=")) { obj_path = opt.substr(strlen("obj-path=")); + // @LOCALMOD-BEGIN + } else if (opt == "no-gather-then-link") { + gather_then_link = false; + // @LOCALMOD-END } else if (opt == "emit-llvm") { generate_bc_file = BC_ONLY; } else if (opt == "also-emit-llvm") { @@ -123,6 +153,18 @@ namespace options { } } +// @LOCALMOD-BEGIN +static const char *get_basename(const char *path) { + if (path == NULL) + return NULL; + const char *slash = strrchr(path, '/'); + if (slash) + return slash + 1; + + return path; +} +// @LOCALMOD-END + static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, int *claimed); static ld_plugin_status all_symbols_read_hook(void); @@ -150,6 +192,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) { output_name = tv->tv_u.tv_string; break; case LDPT_LINKER_OUTPUT: + // @LOCALMOD-BEGIN + linker_output = + static_cast<ld_plugin_output_file_type>(tv->tv_u.tv_val); + // @LOCALMOD-END switch (tv->tv_u.tv_val) { case LDPO_REL: // .o case LDPO_DYN: // .so @@ -213,7 +259,23 @@ ld_plugin_status onload(ld_plugin_tv *tv) { break; case LDPT_GET_VIEW: get_view = tv->tv_u.tv_get_view; + // @LOCALMOD-BEGIN + case LDPT_GET_OUTPUT_SONAME: + get_output_soname = tv->tv_u.tv_get_output_soname; break; + case LDPT_GET_NEEDED: + get_needed = tv->tv_u.tv_get_needed; + break; + case LDPT_GET_NUM_NEEDED: + get_num_needed = tv->tv_u.tv_get_num_needed; + break; + case LDPT_GET_WRAPPED: + get_wrapped = tv->tv_u.tv_get_wrapped; + break; + case LDPT_GET_NUM_WRAPPED: + get_num_wrapped = tv->tv_u.tv_get_num_wrapped; + break; + // @LOCALMOD-END case LDPT_MESSAGE: message = tv->tv_u.tv_message; break; @@ -231,6 +293,24 @@ ld_plugin_status onload(ld_plugin_tv *tv) { return LDPS_ERR; } + // @LOCALMOD-BEGIN + // Parse extra command-line options + // Although lto_codegen provides a way to parse command-line arguments, + // we need the arguments to be parsed and applied before LTOModules are + // even created. In particular, this is needed because the + // "-add-nacl-read-tp-dependency" flag affects how modules are created. + if (!options::extra.empty()) { + for (std::vector<std::string>::iterator it = options::extra.begin(); + it != options::extra.end(); ++it) { + lto_add_command_line_option((*it).c_str()); + } + lto_parse_command_line_options(); + // We clear the options so that they don't get parsed again in + // lto_codegen_debug_options. + options::extra.clear(); + } + // @LOCALMOD-END + return LDPS_OK; } @@ -297,7 +377,21 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, ld_plugin_symbol &sym = cf.syms.back(); sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i)); sym.name = strdup(sym.name); + // @LOCALMOD-BEGIN + // Localmods have disabled the use of the 'version' field for passing + // version information to Gold. Instead, the version is now transmitted as + // part of the 'name' field, which has the form "sym@VER" or "sym@@VER". + // This is nicer because it communicates one extra bit of information (@@ + // marks the default version), and allows us to access the real symbol + // name in all_symbols_read. + + // These fields are set by Gold to communicate the updated version info + // to the plugin. They are used in all_symbols_read_hook(). + // Initialize them for predictability. sym.version = NULL; + sym.is_default = false; + sym.dynfile = NULL; + // @LOCALMOD-END int scope = attrs & LTO_SYMBOL_SCOPE_MASK; switch (scope) { @@ -346,18 +440,45 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, } cf.syms.reserve(cf.syms.size()); + // @LOCALMOD-BEGIN + bool is_shared = + (lto_module_get_output_format(M) == LTO_OUTPUT_FORMAT_SHARED); + const char* soname = lto_module_get_soname(M); + if (soname[0] == '\0') + soname = NULL; + // @LOCALMOD-END if (!cf.syms.empty()) { - if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) { + if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0], + is_shared, soname) != LDPS_OK) { // @LOCALMOD (*message)(LDPL_ERROR, "Unable to add symbols!"); return LDPS_ERR; } } - if (code_gen) - lto_codegen_add_module(code_gen, M); + // @LOCALMOD-BEGIN + // Do not merge the module if it's a PSO. + // If the PSO's soname is set, add it to DepLibs. + cf.is_linked_in = false; + if (code_gen) { + if (is_shared) { + if (soname && strlen(soname) > 0) { + DepLibs.push_back(soname); + } + } else { + if (options::gather_then_link) { + lto_codegen_gather_module_for_link(code_gen, M); + } else { + lto_codegen_add_module(code_gen, M); + } + cf.is_linked_in = true; + } + } - lto_module_dispose(M); + // With gather_then_link, the modules are disposed when linking. + if (!options::gather_then_link) + lto_module_dispose(M); + // @LOCALMOD-END return LDPS_OK; } @@ -370,6 +491,12 @@ static ld_plugin_status all_symbols_read_hook(void) { std::ofstream api_file; assert(code_gen); + // @LOCALMOD-BEGIN + if (options::gather_then_link) { + lto_codegen_link_gathered_modules_and_dispose(code_gen); + } + // @LOCALMOD-END + if (options::generate_api_file) { api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc); if (!api_file.is_open()) { @@ -384,12 +511,45 @@ static ld_plugin_status all_symbols_read_hook(void) { continue; (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]); for (unsigned i = 0, e = I->syms.size(); i != e; i++) { + // @LOCALMOD-BEGIN + // Don't process the symbols inside a dynamic object. + if (!I->is_linked_in) + continue; + // @LOCALMOD-END + if (I->syms[i].resolution == LDPR_PREVAILING_DEF) { + // @LOCALMOD-BEGIN + // Set the symbol version in the module. + if (linker_output != LDPO_REL && I->syms[i].version) { + // NOTE: This may change the name of the symbol, so it must happen + // before the call to lto_codegen_add_must_preserve_symbols() below. + I->syms[i].name = const_cast<char *>( + lto_codegen_set_symbol_def_version(code_gen, I->syms[i].name, + I->syms[i].version, + I->syms[i].is_default)); + } lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name); + // @LOCALMOD-END if (options::generate_api_file) api_file << I->syms[i].name << "\n"; } + // @LOCALMOD-BEGIN + else if (linker_output != LDPO_REL && + (I->syms[i].resolution == LDPR_RESOLVED_DYN || + I->syms[i].resolution == LDPR_UNDEF)) { + // This symbol is provided by an external object. + // Set the version and source dynamic file for it. + const char *ver = I->syms[i].version; + const char *dynfile = I->syms[i].dynfile; + dynfile = get_basename(dynfile); + // NOTE: This may change the name of the symbol. + I->syms[i].name = const_cast<char *>( + lto_codegen_set_symbol_needed(code_gen, I->syms[i].name, + ver ? ver : "", + dynfile ? dynfile : "")); + } + // @LOCALMOD-END } } @@ -401,6 +561,11 @@ static ld_plugin_status all_symbols_read_hook(void) { if (!options::mcpu.empty()) lto_codegen_set_cpu(code_gen, options::mcpu.c_str()); + // @LOCALMOD-BEGIN (COMMENT) + // "extra" will always be empty below, because we process the extra + // options earlier, at the end of onload(). + // @LOCALMOD-END + // Pass through extra options to the code generator. if (!options::extra.empty()) { for (std::vector<std::string>::iterator it = options::extra.begin(); @@ -409,6 +574,57 @@ static ld_plugin_status all_symbols_read_hook(void) { } } + // @LOCALMOD-BEGIN + // Store the linker output format into the bitcode. + lto_output_format format; + switch (linker_output) { + case LDPO_REL: + format = LTO_OUTPUT_FORMAT_OBJECT; + break; + case LDPO_DYN: + format = LTO_OUTPUT_FORMAT_SHARED; + break; + case LDPO_EXEC: + format = LTO_OUTPUT_FORMAT_EXEC; + break; + default: + (*message)(LDPL_FATAL, "Unknown linker output format (gold-plugin)"); + abort(); + break; + } + lto_codegen_set_merged_module_output_format(code_gen, format); + // @LOCALMOD-END + + // @LOCALMOD-BEGIN + // For -shared linking, store the soname into the bitcode. + if (linker_output == LDPO_DYN) { + const char *soname = (*get_output_soname)(); + lto_codegen_set_merged_module_soname(code_gen, soname); + } + // @LOCALMOD-END + + // @LOCALMOD-BEGIN + // Add the needed libraries to the bitcode. + unsigned int num_needed = (*get_num_needed)(); + for (unsigned i=0; i < num_needed; ++i) { + const char *soname = (*get_needed)(i); + soname = get_basename(soname); + lto_codegen_add_merged_module_library_dep(code_gen, soname); + } + for (std::vector<std::string>::iterator I = DepLibs.begin(), + E = DepLibs.end(); I != E; ++I) { + lto_codegen_add_merged_module_library_dep(code_gen, I->c_str()); + } + // @LOCALMOD-END + + // @LOCALMOD-BEGIN + // Perform symbol wrapping. + unsigned int num_wrapped = (*get_num_wrapped)(); + for (unsigned i=0; i < num_wrapped; ++i) { + const char *sym = (*get_wrapped)(i); + lto_codegen_wrap_symbol_in_merged_module(code_gen, sym); + } + // @LOCALMOD-END if (options::generate_bc_file != options::BC_NO) { std::string path; if (options::generate_bc_file == options::BC_ONLY) diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt index 683f29862d..9c695bcdea 100644 --- a/tools/llc/CMakeLists.txt +++ b/tools/llc/CMakeLists.txt @@ -1,5 +1,11 @@ set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser) add_llvm_tool(llc +# LOCALMOD BEGIN +# This file provides wrappers to lseek(2), read(2), etc. + nacl_file.cpp + StubMaker.cpp + TextStubWriter.cpp +# LOCALMOD END llc.cpp ) diff --git a/tools/llc/ELFStub.h b/tools/llc/ELFStub.h new file mode 100644 index 0000000000..a79fecff0f --- /dev/null +++ b/tools/llc/ELFStub.h @@ -0,0 +1,55 @@ +// This file describes a simple high-level representation of an ELF stub. + +#ifndef __ELF_STUB_H +#define __ELF_STUB_H + +#include <llvm/Support/ELF.h> +#include <llvm/ADT/StringMap.h> +#include <string> +#include <vector> + +namespace llvm { + +struct SymbolStub; +struct VersionDefinition; + +using ELF::Elf32_Half; + +struct ELFStub { + Elf32_Half Machine; + std::string SOName; + std::vector<SymbolStub> Symbols; + std::vector<VersionDefinition> VerDefs; + + // These are used for constructing the version definitions. + // They are not directly emitted to the ELF stub. + StringMap<Elf32_Half> IndexMap; // Maps version name to version index. + Elf32_Half NextIndex; // Next available version index +}; + + +// Dynamic symbol entries +struct SymbolStub { + // Symbol Table info. + std::string Name; + unsigned char Type; // STT_* + unsigned char Binding; // STB_* + unsigned char Visibility; // STV_* + ELF::Elf32_Word Size; // Guess for st_size. + // st_value, etc. are stubbed out. + + // Version info matching each of the symbols. + Elf32_Half VersionIndex; // vd_ndx + bool IsDefault; +}; + +// Versions defined in this module +struct VersionDefinition { + Elf32_Half Index; // vd_ndx + bool IsWeak; // TODO(pdox): Implement this (for vd_flags) + std::string Name; // for vda_name, etc. + std::vector<std::string> Parents; // TODO(pdox): Implement this +}; + +} +#endif diff --git a/tools/llc/SRPCStreamer.cpp b/tools/llc/SRPCStreamer.cpp new file mode 100644 index 0000000000..3eaa7c17c6 --- /dev/null +++ b/tools/llc/SRPCStreamer.cpp @@ -0,0 +1,116 @@ +//===-- SRPCStreamer.cpp - Stream bitcode over SRPC ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#if defined(__native_client__) && defined(NACL_SRPC) +#define DEBUG_TYPE "bitcode-stream" +#include "SRPCStreamer.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include <errno.h> + +using llvm::dbgs; + +size_t QueueStreamer::GetBytes(unsigned char *buf, size_t len) { + pthread_mutex_lock(&Mutex); + while (!Done && queueSize() < len) { + DEBUG(dbgs() << "QueueStreamer::GetBytes len " << len << " size " << + queueSize() <<" << waiting\n"); + pthread_cond_wait(&Cond, &Mutex); + } + if (Done && queueSize() < len) len = queueSize(); + queueGet(buf, len); + pthread_mutex_unlock(&Mutex); + return len; +} + +size_t QueueStreamer::PutBytes(unsigned char *buf, size_t len) { + pthread_mutex_lock(&Mutex); + queuePut(buf, len); + pthread_cond_signal(&Cond); + pthread_mutex_unlock(&Mutex); + return len; +} + +void QueueStreamer::SetDone() { + // Still need the lock to avoid signaling between the check and + // the wait in GetBytes. + pthread_mutex_lock(&Mutex); + Done = true; + pthread_cond_signal(&Cond); + pthread_mutex_unlock(&Mutex); +} + +// Called with Mutex held to protect Cons, Prod, and Bytes +void QueueStreamer::queuePut(unsigned char *buf, size_t len) { + while (capacityRemaining() < len) { + int leftover = Bytes.size() - Cons; + DEBUG(dbgs() << "resizing " << leftover << " " << Prod << " " << + Cons << "\n"); + Bytes.resize(Bytes.size() * 2); + if (Cons > Prod) { + // There are unread bytes left between Cons and the previous end of the + // buffer. Move them to the new end of the buffer. + memmove(&Bytes[Bytes.size() - leftover], &Bytes[Cons], leftover); + Cons = Bytes.size() - leftover; + } + } + size_t EndSpace = std::min(len, Bytes.size() - Prod); + DEBUG(dbgs() << "put, len " << len << " Endspace " << EndSpace << " p " << + Prod << " c " << Cons << "\n"); + // Copy up to the end of the buffer + memcpy(&Bytes[Prod], buf, EndSpace); + // Wrap around if necessary + memcpy(&Bytes[0], buf + EndSpace, len - EndSpace); + Prod = (Prod + len) % Bytes.size(); +} + +// Called with Mutex held to protect Cons, Prod, and Bytes +void QueueStreamer::queueGet(unsigned char *buf, size_t len) { + assert(len <= queueSize()); + size_t EndSpace = std::min(len, Bytes.size() - Cons); + DEBUG(dbgs() << "get, len " << len << " Endspace " << EndSpace << " p " << + Prod << " c " << Cons << "\n"); + // Copy up to the end of the buffer + memcpy(buf, &Bytes[Cons], EndSpace); + // Wrap around if necessary + memcpy(buf + EndSpace, &Bytes[0], len - EndSpace); + Cons = (Cons + len) % Bytes.size(); +} + +llvm::DataStreamer *SRPCStreamer::init(void *(*Callback)(void *), void *arg, + std::string *ErrMsg) { + int err = pthread_create(&CompileThread, NULL, Callback, arg); + if (err) { + if (ErrMsg) *ErrMsg = std::string(strerror(errno)); + return NULL; + } + return &Q; +} + +size_t SRPCStreamer::gotChunk(unsigned char *bytes, size_t len) { + if (Error) return 0; + return Q.PutBytes(bytes, len); +} + +int SRPCStreamer::streamEnd(std::string *ErrMsg) { + Q.SetDone(); + int err = pthread_join(CompileThread, NULL); + if (err) { + if (ErrMsg) *ErrMsg = std::string(strerror(errno)); + return err; + } + if (Error && ErrMsg) *ErrMsg = std::string("compile failed."); + return Error; +} + +#endif diff --git a/tools/llc/SRPCStreamer.h b/tools/llc/SRPCStreamer.h new file mode 100644 index 0000000000..a326d9276d --- /dev/null +++ b/tools/llc/SRPCStreamer.h @@ -0,0 +1,93 @@ +//===-- SRPCStreamer.cpp - Stream bitcode over SRPC ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +// +//===----------------------------------------------------------------------===// + +#ifndef SRPCSTREAMER_H +#define SRPCSTREAMER_H + +#include <pthread.h> +#include <cassert> +#include <cstdio> +#include <cstring> +#include <vector> +#include "llvm/Support/DataStream.h" + +// Implements LLVM's interface for fetching data from a stream source. +// Bitcode bytes from the RPC thread are placed here with PutBytes and buffered +// until the bitcode reader calls GetBytes to remove them. +class QueueStreamer : public llvm::DataStreamer { + public: + QueueStreamer() : Done(false), Prod(0), Cons(0) { + pthread_mutex_init(&Mutex, NULL); + pthread_cond_init(&Cond, NULL); + Bytes.resize(64 * 1024); + } + // Called by the compilation thread. Wait for len bytes to become available, + // and copy them into buf. If all bytes have been received and there are + // fewer than len bytes available, copy all remaining bytes. + // Return the number of bytes copied. + virtual size_t GetBytes(unsigned char *buf, size_t len); + + // Called by the RPC thread. Copy len bytes from buf and wake up the + // compilation thread if it is waiting. Return the number of bytes copied. + size_t PutBytes(unsigned char *buf, size_t len); + + // Called by the RPC thread. Signal that all bytes have been received, + // so the last call to GetBytes will return the remaining bytes rather + // than waiting for the entire requested amound. + void SetDone(); + + private: + bool Done; + pthread_mutex_t Mutex; + pthread_cond_t Cond; + + // Variables and functions to manage the circular queue + std::vector<unsigned char> Bytes; + size_t Prod; // Queue producer index + size_t Cons; // Queue consumer index + size_t queueSize() { + return Prod >= Cons ? Prod - Cons : Bytes.size() - (Cons - Prod); + } + size_t capacityRemaining() { + return (Prod >= Cons ? Bytes.size() - (Prod - Cons) : (Cons - Prod)) - 1; + } + void queuePut(unsigned char *buf, size_t len); + void queueGet(unsigned char *buf, size_t len); +}; + +// Class to manage the compliation thread and serve as the interface from +// the SRPC thread +class SRPCStreamer { +public: + SRPCStreamer() : Error(false) {} + // Initialize streamer, create a new thread running Callback, and + // return a pointer to the DataStreamer the threads will use to + // synchronize. On error, return NULL and fill in the ErrorMsg string + llvm::DataStreamer *init(void *(*Callback)(void *), + void *arg, std::string *ErrMsg); + // Called by the RPC thread. Copy len bytes from buf. Return bytes copied. + size_t gotChunk(unsigned char *bytes, size_t len); + // Called by the RPC thread. Wait for the compilation thread to finish. + int streamEnd(std::string *ErrMsg); + // Called by the compilation thread. Signal that there was a compilation + // error so the RPC thread can abort the stream. + void setError() { Error = true; } +private: + bool Error; + QueueStreamer Q; + pthread_t CompileThread; +}; + + + +#endif // SRPCSTREAMER_H diff --git a/tools/llc/StubMaker.cpp b/tools/llc/StubMaker.cpp new file mode 100644 index 0000000000..cc343280a3 --- /dev/null +++ b/tools/llc/StubMaker.cpp @@ -0,0 +1,233 @@ +// Create a high-level representation of the needed library. + +#include "StubMaker.h" + +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" +#include "llvm/Module.h" +#include "llvm/Type.h" +#include "ELFStub.h" + +using namespace llvm; + +// Extract the Name, Version, and IsDefault flag from the FullName string. +// e.g. foo@V1 --> foo, V1, false +// bar@@V2 --> bar, V2, true +static void ExtractVersion(StringRef FullName, + StringRef &Name, + StringRef &Version, + bool &IsDefault) { + size_t atpos = FullName.find('@'); + if (atpos == StringRef::npos) { + Name = FullName; + Version = ""; + IsDefault = false; + return; + } + Name = FullName.substr(0, atpos); + ++atpos; + if (FullName[atpos] == '@') { + IsDefault = true; + ++atpos; + } else { + IsDefault = false; + } + Version = FullName.substr(atpos); +} + + +// This implicitly creates a version record as a result of locating a symbol +// with this version. There is normally more information attached to a +// version definition: the parent version(s) and definition flags (weak +// or base). This information is currently not stored in the bitcode +// module. It may be necessary to add this in the future. +static Elf32_Half AddVersionDef(ELFStub *Stub, StringRef Name) { + VersionDefinition VD; + VD.Name = Name; + VD.Index = Stub->NextIndex++; + VD.IsWeak = false; // TODO(pdox): Implement + VD.Parents.clear(); // TODO(pdox): Implement + Stub->VerDefs.push_back(VD); + Stub->IndexMap[VD.Name] = VD.Index; + return VD.Index; +} + +static Elf32_Half GetVersionIndex(StringRef Version, ELFStub *Stub) { + // Handle unversioned symbols + if (Version.empty()) + return 1; /* ELF::VER_NDX_GLOBAL */ + // Find the version definition, if it already exists. + StringMap<Elf32_Half>::const_iterator I = Stub->IndexMap.find(Version); + if (I != Stub->IndexMap.end()) { + return I->second; + } + // If not, create it. + return AddVersionDef(Stub, Version); +} + +static Elf32_Half GetELFMachine(const Triple &T) { + switch (T.getArch()) { + default: llvm_unreachable("Unknown target triple in StubMaker.cpp"); + case Triple::x86_64: return ELF::EM_X86_64; + case Triple::x86: return ELF::EM_386; + case Triple::arm: return ELF::EM_ARM; + case Triple::mipsel: return ELF::EM_MIPS; + } +} + +static unsigned char GetELFVisibility(const GlobalValue *GV) { + switch (GV->getVisibility()) { + case GlobalValue::DefaultVisibility: return ELF::STV_DEFAULT; + case GlobalValue::HiddenVisibility: return ELF::STV_HIDDEN; + case GlobalValue::ProtectedVisibility: return ELF::STV_PROTECTED; + } + llvm_unreachable("Unknown visibility in GETELFVisibility"); +} + +static ELF::Elf32_Word GetElfSizeForType(const GlobalValue *GV, + const Type *ElemType) { + unsigned bit_size = ElemType->getPrimitiveSizeInBits(); + if (bit_size != 0) { + // Check against 0 to see if it was actually a primitive. + return bit_size / 8; + } + if (isa<PointerType>(ElemType)) { + // Pointers are 32-bit for NaCl. + return 4; + } + if (isa<FunctionType>(ElemType)) { + // This is not a data object, so just say unknown (0). + return 0; + } + if (const ArrayType *ATy = dyn_cast<ArrayType>(ElemType)) { + unsigned elem_size = GetElfSizeForType(GV, ATy->getElementType()); + unsigned num_elems = ATy->getNumElements(); + // TODO(jvoung): Come up with a test for what to do with 0-length arrays. + // Not sure what to do here actually. It may be that the 0-length + // array is meant to be an opaque type, which you can never check the + // "sizeof". For now, return 0 instead of asserting. + // Known instance of this in library code is in basic_string.h: + // static size_type _S_empty_rep_storage[]; + return elem_size * num_elems; + } + if (const VectorType *VTy = dyn_cast<VectorType>(ElemType)) { + unsigned bit_width = VTy->getBitWidth(); + if (bit_width) { + return bit_width / 8; + } else { + // It's a vector of pointers, and pointers are 32-bit in NaCl + return VTy->getNumElements() * 4; + } + } + if (const StructType *STy = dyn_cast<StructType>(ElemType)) { + // Alignment padding should have been added to the type in the front-end. + unsigned size_so_far = 0; + for (unsigned i = 0; i < STy->getNumElements(); ++i) { + size_so_far += GetElfSizeForType(GV, STy->getElementType(i)); + } + return size_so_far; + } + // Unknown type! + DEBUG({ + dbgs() << "Unknown GetELFSize for var="; + GV->dump(); + dbgs() << " type= "; + ElemType->dump(); + dbgs() << "\n"; + }); + llvm_unreachable("Unhandled type for GetELFSize"); + return 0; +} + +// Return a value for the symbol table's st_size, which is the number of bytes +// in a data object. Functions may report unknown size 0 (not data objects). +// This is known to be important for symbols that may sit in BSS +// with copy relocations (to know how much to copy). +static ELF::Elf32_Word GetELFSize(const GlobalValue *GV) { + const class PointerType *PT = GV->getType(); + const Type *ElemType = PT->getElementType(); + return GetElfSizeForType(GV, ElemType); +} + +static unsigned char GetELFType(const GlobalValue *GV) { + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) { + return GVar->isThreadLocal() ? ELF::STT_TLS : ELF::STT_OBJECT; + } else if (isa<Function>(GV)) { + // TODO(pdox): Handle STT_GNU_IFUNC + return ELF::STT_FUNC; + } + // TODO(pdox): Do we need to resolve GlobalAliases? + llvm_unreachable("Unknown GlobalValue type in GetELFType!"); +} + +static unsigned char GetELFBinding(const GlobalValue *GV) { + // TODO(pdox): + // This information would ideally be made to match the symbol binding + // as declared in the original shared object. However, GV is only the + // declaration for this symbol, so we cannot derive the definition's + // binding here. But it seems like it should be fine to always set it to + // STB_GLOBAL, since we already know this symbol is the prevailing + // definition. + return ELF::STB_GLOBAL; +} + +static void MakeOneStub(const Module &M, + const Module::NeededRecord &NR, + ELFStub *Stub) { + Stub->SOName = NR.DynFile; + Stub->NextIndex = 2; // 0,1 are reserved + for (unsigned j = 0; j < NR.Symbols.size(); ++j) { + StringRef FullName = NR.Symbols[j]; + GlobalValue *GV = M.getNamedValue(FullName); + if (!GV) { + // The symbol may have been removed by optimization or dead code + // elimination, so this is not an error. + continue; + } + StringRef Name; + StringRef Version; + bool IsDefault; + ExtractVersion(FullName, Name, Version, IsDefault); + + SymbolStub SS; + SS.Name = Name; + SS.Type = GetELFType(GV); + SS.Binding = GetELFBinding(GV); + SS.Visibility = GetELFVisibility(GV); + SS.Size = GetELFSize(GV); + SS.VersionIndex = GetVersionIndex(Version, Stub); + SS.IsDefault = IsDefault; + Stub->Symbols.push_back(SS); + } +} + +namespace llvm { + +// For module M, make all the stubs neededs and insert them into StubList. +void MakeAllStubs(const Module &M, const Triple &T, + SmallVectorImpl<ELFStub*> *StubList) { + std::vector<Module::NeededRecord> NRList; + M.getNeededRecords(&NRList); + Elf32_Half Machine = GetELFMachine(T); + for (unsigned i = 0; i < NRList.size(); ++i) { + const Module::NeededRecord &NR = NRList[i]; + ELFStub *Stub = new ELFStub(); + Stub->Machine = Machine; + MakeOneStub(M, NR, Stub); + StubList->push_back(Stub); + } +} + +void FreeStubList(llvm::SmallVectorImpl<ELFStub*> *StubList) { + for (unsigned i = 0; i < StubList->size(); ++i) { + delete (*StubList)[i]; + } + StubList->clear(); +} + +} // namespace diff --git a/tools/llc/StubMaker.h b/tools/llc/StubMaker.h new file mode 100644 index 0000000000..27e1e55d7f --- /dev/null +++ b/tools/llc/StubMaker.h @@ -0,0 +1,20 @@ +#ifndef __STUB_MAKER_H +#define __STUB_MAKER_H + +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class Module; +class Triple; +class ELFStub; + +// For module M, make all required ELF stubs and insert them into StubList. +void MakeAllStubs(const Module &M, + const Triple &T, + SmallVectorImpl<ELFStub*> *StubList); +void FreeStubList(SmallVectorImpl<ELFStub*> *StubList); + +} + +#endif diff --git a/tools/llc/TextStubWriter.cpp b/tools/llc/TextStubWriter.cpp new file mode 100644 index 0000000000..ae6e2f77d3 --- /dev/null +++ b/tools/llc/TextStubWriter.cpp @@ -0,0 +1,84 @@ +// Using the high-level representation of an ELF stub, create a text version +// of the ELF stub object. + +#include "TextStubWriter.h" + +#include <sstream> + +#include "ELFStub.h" +#include "llvm/Support/ELF.h" + +using namespace llvm; + +namespace { + +std::string LibShortname(const std::string &fullname) { + std::string result = fullname; + if (result.find("lib") != std::string::npos) { + result = result.substr(3); + } + size_t so_pos = result.find(".so"); + if (so_pos != std::string::npos) { + result = result.substr(0, so_pos); + } + return result; +} + +const ELF::Elf32_Half kDummyCodeShndx = 5; +const ELF::Elf32_Half kDummyDataShndx = 6; + +} // namespace + +namespace llvm { + +// Write out the dynamic symbol table information. The format must be kept +// in sync with the changes in NaCl's version of gold (see gold/metadata.cc). +void WriteTextELFStub(const ELFStub *Stub, std::string *output) { + std::stringstream ss; + + ss << "#### Symtab for " << Stub->SOName << "\n"; + ss << "@obj " << LibShortname(Stub->SOName) << " " << Stub->SOName << "\n"; + + // st_value is usually a relative address for .so, and .exe files. + // So, make some up. + ELF::Elf32_Addr fake_relative_addr = 0; + for (size_t i = 0; i < Stub->Symbols.size(); ++i) { + const SymbolStub &sym = Stub->Symbols[i]; + + ELF::Elf32_Addr st_value = fake_relative_addr; + ELF::Elf32_Word st_size = sym.Size; + unsigned int st_info = sym.Type | (sym.Binding << 4); + unsigned int st_other = sym.Visibility; + ELF::Elf32_Half st_shndx = sym.Type == ELF::STT_FUNC ? + kDummyCodeShndx : kDummyDataShndx; + ELF::Elf32_Half vd_ndx = sym.VersionIndex; + // Mark non-default versions hidden. + if (!sym.IsDefault) { + vd_ndx |= ELF::VERSYM_HIDDEN; + } + + ss << "@sym " + << sym.Name << " " // Representative for st_name. + << (st_value) << " " + << (st_size) << " " + << (st_info) << " " + << (st_other) << " " + << (st_shndx) << " " + << (vd_ndx) << " " + << "\n"; + fake_relative_addr += (sym.Size == 0 ? 4 : sym.Size); + } + + // Now dump the version map. + ss << "#### VerDefs for " << Stub->SOName << "\n"; + for (size_t i = 0; i < Stub->VerDefs.size(); ++i) { + const VersionDefinition &verdef = Stub->VerDefs[i]; + ss << "@ver " << (Elf32_Half)(verdef.Index) << " " << verdef.Name << "\n"; + } + + ss << "\n"; + + output->append(ss.str()); +} + +} // namespace llvm diff --git a/tools/llc/TextStubWriter.h b/tools/llc/TextStubWriter.h new file mode 100644 index 0000000000..4dbc5978b2 --- /dev/null +++ b/tools/llc/TextStubWriter.h @@ -0,0 +1,12 @@ +#ifndef __TEXT_STUB_WRITER_H +#define __TEXT_STUB_WRITER_H + +#include "ELFStub.h" + +namespace llvm { + +void WriteTextELFStub(const ELFStub *Stub, std::string *output); + +} + +#endif diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index 4d4a74c009..bd2fa4c1cd 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -20,8 +20,10 @@ #include "llvm/Pass.h" #include "llvm/ADT/Triple.h" #include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Support/DataStream.h" // @LOCALMOD #include "llvm/Support/IRReader.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD #include "llvm/CodeGen/LinkAllAsmWriterComponents.h" #include "llvm/CodeGen/LinkAllCodegenComponents.h" #include "llvm/MC/SubtargetFeature.h" @@ -29,7 +31,9 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/ManagedStatic.h" +#if !defined(__native_client__) #include "llvm/Support/PluginLoader.h" +#endif #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/Host.h" @@ -39,8 +43,33 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetMachine.h" #include <memory> + +// @LOCALMOD-BEGIN +#include "StubMaker.h" +#include "TextStubWriter.h" +// @LOCALMOD-END + using namespace llvm; +// @LOCALMOD-BEGIN +// NOTE: this tool can be build as a "sandboxed" translator. +// There are two ways to build the translator +// SRPC-style: no file operations are allowed +// see nacl_file.cc for support code +// non-SRPC-style: some basic file operations are allowed +// This can be useful for debugging but will +// not be deployed. +#if defined(__native_client__) && defined(NACL_SRPC) +MemoryBuffer* NaClGetMemoryBufferForFile(const char* filename); +void NaClOutputStringToFile(const char* filename, const std::string& data); +// The following two functions communicate metadata to the SRPC wrapper for LLC. +void NaClRecordObjectInformation(bool is_shared, const std::string& soname); +void NaClRecordSharedLibraryDependency(const std::string& library_name); +DataStreamer* NaClBitcodeStreamer; +#endif +// @LOCALMOD-END + + // General options for llc. Other pass-specific options are specified // within the corresponding llc passes, and target-specific options // and back-end code generation options are specified with the target machine. @@ -51,6 +80,32 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-")); static cl::opt<std::string> OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); +// @LOCALMOD-BEGIN +static cl::opt<std::string> +MetadataTextFilename("metadata-text", cl::desc("Metadata as text, out filename"), + cl::value_desc("filename")); + +// Using bitcode streaming has a couple of ramifications. Primarily it means +// that the module in the file will be compiled one function at a time rather +// than the whole module. This allows earlier functions to be compiled before +// later functions are read from the bitcode but of course means no whole-module +// optimizations. For now, streaming is only supported for files and stdin. +static cl::opt<bool> +LazyBitcode("streaming-bitcode", + cl::desc("Use lazy bitcode streaming for file inputs"), + cl::init(false)); + +// The option below overlaps very much with bitcode streaming. +// We keep it separate because it is still experimental and we want +// to use it without changing the outside behavior which is especially +// relevant for the sandboxed case. +static cl::opt<bool> +ReduceMemoryFootprint("reduce-memory-footprint", + cl::desc("Aggressively reduce memory used by llc"), + cl::init(false)); + +// @LOCALMOD-END + // Determine optimization level. static cl::opt<char> OptLevel("O", @@ -149,9 +204,60 @@ static tool_output_file *GetOutputStream(const char *TargetName, return FDOut; } +// @LOCALMOD-BEGIN +#if defined(__native_client__) && defined(NACL_SRPC) +void RecordMetadataForSrpc(const Module &mod) { + bool is_shared = (mod.getOutputFormat() == Module::SharedOutputFormat); + std::string soname = mod.getSOName(); + NaClRecordObjectInformation(is_shared, soname); + for (Module::lib_iterator L = mod.lib_begin(), + E = mod.lib_end(); + L != E; ++L) { + NaClRecordSharedLibraryDependency(*L); + } +} +#endif // defined(__native_client__) && defined(NACL_SRPC) +// @LOCALMOD-END + + +// @LOCALMOD-BEGIN + +// Write the ELF Stubs to the metadata file, in text format +// Returns 0 on success, non-zero on error. +int WriteTextMetadataFile(const Module &M, const Triple &TheTriple) { + // Build the ELF stubs (in high level format) + SmallVector<ELFStub*, 8> StubList; + // NOTE: The triple is unnecessary for the text version. + MakeAllStubs(M, TheTriple, &StubList); + // For each stub, write the ELF object to the metadata file. + std::string s; + for (unsigned i = 0; i < StubList.size(); i++) { + WriteTextELFStub(StubList[i], &s); + } + FreeStubList(&StubList); + +#if defined(__native_client__) && defined(NACL_SRPC) + NaClOutputStringToFile(MetadataTextFilename.c_str(), s); +#else + std::string error; + OwningPtr<tool_output_file> MOut( + new tool_output_file(MetadataTextFilename.c_str(), error, + raw_fd_ostream::F_Binary)); + if (!error.empty()) { + errs() << error << '\n'; + return 1; + } + MOut->os().write(s.data(), s.size()); + MOut->keep(); +#endif + return 0; +} + +// @LOCALMOD-END + // main - Entry point for the llc compiler. // -int main(int argc, char **argv) { +int llc_main(int argc, char **argv) { sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); @@ -192,13 +298,66 @@ int main(int argc, char **argv) { // If user just wants to list available options, skip module loading if (!SkipModule) { + // @LOCALMOD-BEGIN +#if defined(__native_client__) && defined(NACL_SRPC) + if (LazyBitcode) { + std::string StrError; + M.reset(getStreamedBitcodeModule(std::string("<SRPC stream>"), + NaClBitcodeStreamer, Context, &StrError)); + if (!StrError.empty()) { + Err = SMDiagnostic(InputFilename, SourceMgr::DK_Error, StrError); + } + } else { + // In the NACL_SRPC case, open the file with our special wrapper, which + // is aware of pre-opened file descriptors. + // NOTE: we could remove this if we only support streaming. + // ParseIR() should take ownership of the MemoryBuffer. + M.reset(ParseIR(NaClGetMemoryBufferForFile(InputFilename.c_str()), + Err, + Context)); + M->setModuleIdentifier(InputFilename); + } +#else + if (LazyBitcode) { + std::string StrError; + DataStreamer *streamer = getDataFileStreamer(InputFilename, &StrError); + if (streamer) { + M.reset(getStreamedBitcodeModule(InputFilename, streamer, Context, + &StrError)); + } + if (!StrError.empty()) { + Err = SMDiagnostic(InputFilename, SourceMgr::DK_Error, StrError); + } + } else { M.reset(ParseIRFile(InputFilename, Err, Context)); + } +#endif + // @LOCALMOD-END + mod = M.get(); if (mod == 0) { Err.print(argv[0], errs()); return 1; } + // @LOCALMOD-BEGIN +#if defined(__native_client__) && defined(NACL_SRPC) + RecordMetadataForSrpc(*mod); + + // To determine if we should compile PIC or not, we needed to load at + // least the metadata. Since we've already constructed the commandline, + // we have to hack this in after commandline processing. + if (mod->getOutputFormat() == Module::SharedOutputFormat) { + RelocModel = Reloc::PIC_; + } + // Also set PIC_ for dynamic executables: + // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2351 + if (mod->lib_size() > 0) { + RelocModel = Reloc::PIC_; + } +#endif // defined(__native_client__) && defined(NACL_SRPC) + // @LOCALMOD-END + // If we are supposed to override the target triple, do so now. if (!TargetTriple.empty()) mod->setTargetTriple(Triple::normalize(TargetTriple)); @@ -223,6 +382,11 @@ int main(int argc, char **argv) { std::string FeaturesStr; if (MAttrs.size()) { SubtargetFeatures Features; + // @LOCALMOD-BEGIN + // Use the same default attribute settings as libLTO. + // TODO(pdox): Figure out why this isn't done for upstream llc. + Features.getDefaultSubtargetFeatures(TheTriple); + // @LOCALMOD-END for (unsigned i = 0; i != MAttrs.size(); ++i) Features.AddFeature(MAttrs[i]); FeaturesStr = Features.getString(); @@ -289,30 +453,38 @@ int main(int argc, char **argv) { TheTriple.isMacOSXVersionLT(10, 6)) Target.setMCUseLoc(false); +#if !defined(NACL_SRPC) // Figure out where we are going to send the output. OwningPtr<tool_output_file> Out (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0])); if (!Out) return 1; +#endif // Build up all of the passes that we want to do to the module. - PassManager PM; + // @LOCALMOD-BEGIN + OwningPtr<PassManagerBase> PM; + if (LazyBitcode || ReduceMemoryFootprint) + PM.reset(new FunctionPassManager(mod)); + else + PM.reset(new PassManager()); + // @LOCALMOD-END // Add an appropriate TargetLibraryInfo pass for the module's triple. TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple); if (DisableSimplifyLibCalls) TLI->disableAllFunctions(); - PM.add(TLI); + PM->add(TLI); if (target.get()) { - PM.add(new TargetTransformInfo(target->getScalarTargetTransformInfo(), + PM->add(new TargetTransformInfo(target->getScalarTargetTransformInfo(), target->getVectorTargetTransformInfo())); } // Add the target data from the target machine, if it exists, or the module. if (const DataLayout *TD = Target.getDataLayout()) - PM.add(new DataLayout(*TD)); + PM->add(new DataLayout(*TD)); else - PM.add(new DataLayout(mod)); + PM->add(new DataLayout(mod)); // Override default to generate verbose assembly. Target.setAsmVerbosityDefault(true); @@ -325,6 +497,39 @@ int main(int argc, char **argv) { Target.setMCRelaxAll(true); } + + +#if defined __native_client__ && defined(NACL_SRPC) + { + std::string s; + raw_string_ostream ROS(s); + formatted_raw_ostream FOS(ROS); + // Ask the target to add backend passes as necessary. + if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify)) { + errs() << argv[0] << ": target does not support generation of this" + << " file type!\n"; + return 1; + } + + if (LazyBitcode || ReduceMemoryFootprint) { + FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get()); + P->doInitialization(); + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { + P->run(*I); + if (ReduceMemoryFootprint) { + I->Dematerialize(); + } + } + P->doFinalization(); + } else { + static_cast<PassManager*>(PM.get())->run(*mod); + } + FOS.flush(); + ROS.flush(); + NaClOutputStringToFile(OutputFilename.c_str(), ROS.str()); + } +#else + { formatted_raw_ostream FOS(Out->os()); @@ -349,7 +554,7 @@ int main(int argc, char **argv) { } // Ask the target to add backend passes as necessary. - if (Target.addPassesToEmitFile(PM, FOS, FileType, NoVerify, + if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify, StartAfterID, StopAfterID)) { errs() << argv[0] << ": target does not support generation of this" << " file type!\n"; @@ -359,11 +564,50 @@ int main(int argc, char **argv) { // Before executing passes, print the final values of the LLVM options. cl::PrintOptionValues(); - PM.run(*mod); + if (LazyBitcode || ReduceMemoryFootprint) { + FunctionPassManager *P = static_cast<FunctionPassManager*>(PM.get()); + P->doInitialization(); + for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) { + P->run(*I); + if (ReduceMemoryFootprint) { + I->Dematerialize(); + } + } + P->doFinalization(); + } else { + static_cast<PassManager*>(PM.get())->run(*mod); + } } // Declare success. Out->keep(); +#endif + + // @LOCALMOD-BEGIN + // Write out the metadata. + // + // We need to ensure that intrinsic prototypes are available, in case + // we have a NeededRecord for one of them. + // They may have been eliminated by the StripDeadPrototypes pass, + // or some other pass that is unaware of NeededRecords / IntrinsicLowering. + if (!MetadataTextFilename.empty()) { + IntrinsicLowering IL(*target->getDataLayout()); + IL.AddPrototypes(*M); + + int err = WriteTextMetadataFile(*M.get(), TheTriple); + if (err != 0) + return err; + } + // @LOCALMOD-END return 0; } + +#if !defined(NACL_SRPC) +int +main (int argc, char **argv) { + return llc_main(argc, argv); +} +#else +// main() is in nacl_file.cpp. +#endif diff --git a/tools/llc/nacl_file.cpp b/tools/llc/nacl_file.cpp new file mode 100644 index 0000000000..13dcda128a --- /dev/null +++ b/tools/llc/nacl_file.cpp @@ -0,0 +1,480 @@ +/* Copyright 2012 The Native Client Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can + * be found in the LICENSE file. + * + * This file provides wrappers to open() to use pre-opened file descriptors + * for the input bitcode and the output file. + * + * It also has the SRPC interfaces, but that should probably be refactored + * into a separate file. + */ + +#if defined(__native_client__) && defined(NACL_SRPC) + +#include <argz.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +// Headers which are not properly part of the SDK are included by their +// path in the nacl tree +#include "native_client/src/shared/srpc/nacl_srpc.h" +#ifdef __pnacl__ +#include <nacl/pnacl.h> +#endif +#include "SRPCStreamer.h" + + +#include <string> +#include <map> +#include <vector> + +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/system_error.h" + + +using llvm::MemoryBuffer; +using llvm::StringRef; +using std::string; +using std::map; + +#define printerr(...) fprintf(stderr, __VA_ARGS__) +// Temporarily enabling debug prints to debug temp-file usage on windows bots. +#define printdbg(...) fprintf(stderr, __VA_ARGS__) + +#define ARRAY_SIZE(array) (sizeof array / sizeof array[0]) + +namespace { + +typedef std::vector<std::string> string_vector; + +// True if the bitcode to be compiled is for a shared library. +// Used to return to the coordinator. +bool g_bitcode_is_shared_library; +// The soname of the current compilation unit, if it is a shared library. +// Empty string otherwise. +std::string* g_bitcode_soname = NULL; +// The newline separated list of libraries that the current bitcode compilation +// unit depends on. +std::string* g_bitcode_lib_dependencies = NULL; +// The filename used internally for looking up the bitcode file. +char kBitcodeFilename[] = "pnacl.pexe"; +// The filename used internally for looking up the object code file. +char kObjectFilename[] = "pnacl.o"; +// Object which manages streaming bitcode over SRPC and threading. +SRPCStreamer *srpc_streamer; + +} // namespace + +//TODO(dschuff): a little more elegant interface into llc than this? +extern llvm::DataStreamer* NaClBitcodeStreamer; + +class FileInfo { + private: + static map<string, FileInfo*> descriptor_map_; + + string filename_; + int fd_; + + public: + // Construct a FileInfo for a file descriptor. + // File descriptors are used for the bitcode (input) file and for the + // object (output) file passed in by the coordinator when using the Run + // SRPC. + FileInfo(string fn, int fd) : + filename_(fn), fd_(fd) { + printdbg("LLVM-SB-DBG: registering file %d (%s)\n", fd, fn.c_str()); + descriptor_map_[fn] = this; + } + + int GetFd() { + return fd_; + } + + MemoryBuffer* ReadAllDataAsMemoryBuffer() { + printdbg("LLVM-SB-DBG: opening file %d (%s)\n", fd_, filename_.c_str()); + llvm::OwningPtr<MemoryBuffer> mb; + if (llvm::error_code::success() != MemoryBuffer::getOpenFile( + fd_, filename_.c_str(), mb, + -1, -1, 0, false)) { + perror("LLVM-SB-ERROR: ReadAllDataAsMemoryBuffer getOpenFile failed!\n"); + return 0; + } + return mb.take(); + } + + void WriteAllDataToTmpFile(string data) { + printdbg("LLVM-SB-DBG: writing file %d (%s): %d bytes\n", + fd_, filename_.c_str(), data.size()); + + if (fd_ < 0) { + printerr("LLVM-SB-ERROR: invalid fd for write\n"); + return; + } + size_t bytes_to_write = data.size(); + const char* buf = data.c_str(); + while (bytes_to_write > 0) { + ssize_t bytes_written = write(fd_, (const void*) buf, bytes_to_write); + printdbg("LLVM-SB-DBG: write call to file %d (req: %zu, got: %zd)\n", + fd_, bytes_to_write, bytes_written); + if (bytes_written < 0) { + printerr("LLVM-SB-ERROR: write to file %d failed with %zd\n", + fd_, bytes_written); + perror("LLVM-SB-ERROR: WriteAllDataToTmpFile write failed"); + return; + } + buf += bytes_written; + bytes_to_write -= (size_t) bytes_written; + } + } + + void WriteAllData(string data) { + WriteAllDataToTmpFile(data); + } + + static FileInfo* FindFileInfo(const string& fn) { + map<string, FileInfo*>::iterator it = descriptor_map_.find(fn); + if (it == descriptor_map_.end()) { + printerr("LLVM-SB-ERROR: no mapping for filename\n"); + return NULL; + } + return it->second; + } + +}; + +map<string, FileInfo*> FileInfo::descriptor_map_; + +extern int llc_main(int argc, char **argv); + + +MemoryBuffer* NaClGetMemoryBufferForFile(const char* filename) { + FileInfo* fi = FileInfo::FindFileInfo(filename); + if (fi == NULL) { + printerr("LLVM-SB-ERROR: unknown file %s\n", filename); + return NULL; + } + return fi->ReadAllDataAsMemoryBuffer(); +} + +void NaClOutputStringToFile(const char* filename, const string& data) { + FileInfo* fi = FileInfo::FindFileInfo(filename); + fi->WriteAllData(data); +} + +void NaClRecordObjectInformation(bool is_shared, const std::string& soname) { + // This function is invoked to begin recording library information. + // To make it reentrant, we clean up what might be left over from last time. + delete g_bitcode_soname; + delete g_bitcode_lib_dependencies; + // Then remember the module global information. + g_bitcode_is_shared_library = is_shared; + g_bitcode_soname = new std::string(soname); + g_bitcode_lib_dependencies = new std::string(); +} + +void NaClRecordSharedLibraryDependency(const std::string& library_name) { + const std::string& kDelimiterString("\n"); + *g_bitcode_lib_dependencies += (library_name + kDelimiterString); +} + +namespace { + +int DoTranslate(string_vector* cmd_line_vec, int bitcode_fd, int object_fd) { + if (cmd_line_vec == NULL) { + return 1; + } + if (bitcode_fd) { + // Add mapping for bitcode file (side effect is to register the file). + new FileInfo(kBitcodeFilename, bitcode_fd); + } + // Add mapping for object file (side effect is to register the file). + new FileInfo(kObjectFilename, object_fd); + // Make an argv array from the input vector. + size_t argc = cmd_line_vec->size(); + char** argv = new char*[argc]; + for (size_t i = 0; i < argc; ++i) { + // llc_main will not mutate the command line, so this is safe. + argv[i] = const_cast<char*>((*cmd_line_vec)[i].c_str()); + } + argv[argc] = NULL; + // Call main. + return llc_main(static_cast<int>(argc), argv); +} + +string_vector* CommandLineFromArgz(char* str, size_t str_len) { + char* entry = str; + string_vector* vec = new string_vector; + while (entry != NULL) { + vec->push_back(entry); + entry = argz_next(str, str_len, entry); + } + // Add fixed arguments to the command line. These specify the bitcode + // and object code filenames, removing them from the contract with the + // coordinator. + vec->push_back(kBitcodeFilename); + vec->push_back("-o"); + vec->push_back(kObjectFilename); + return vec; +} + +void run(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + NaClSrpcClosureRunner runner(done); + rpc->result = NACL_SRPC_RESULT_APP_ERROR; + int bitcode_fd = in_args[0]->u.hval; + int object_fd = in_args[1]->u.hval; + char* command_line = in_args[2]->arrays.carr; + size_t command_line_len = in_args[2]->u.count; + string_vector* cmd_line_vec = + CommandLineFromArgz(command_line, command_line_len); + if (DoTranslate(cmd_line_vec, bitcode_fd, object_fd) != 0) { + printerr("DoTranslate failed.\n"); + return; + } + delete cmd_line_vec; + out_args[0]->u.ival = g_bitcode_is_shared_library; + // SRPC deletes the strings returned when the closure is invoked. + // Therefore we need to use strdup. + out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str()); + out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str()); + rpc->result = NACL_SRPC_RESULT_OK; +} + +string_vector* GetDefaultCommandLine() { + string_vector* command_line = new string_vector; + size_t i; + // First, those common to all architectures. + static const char* common_args[] = { "pnacl_translator", + "-filetype=obj", + kBitcodeFilename, + "-o", + kObjectFilename }; + for (i = 0; i < ARRAY_SIZE(common_args); ++i) { + command_line->push_back(common_args[i]); + } + // Then those particular to a platform. + static const char* llc_args_x8632[] = { "-march=x86", + "-mcpu=pentium4", + "-mtriple=i686-none-nacl-gnu", + NULL }; + static const char* llc_args_x8664[] = { "-march=x86-64", + "-mcpu=core2", + "-mtriple=x86_64-none-nacl-gnu", + NULL }; + static const char* llc_args_arm[] = { "-mcpu=cortex-a8", + "-mtriple=armv7a-none-nacl-gnueabi", + "-arm-reserve-r9", + "-sfi-disable-cp", + "-sfi-store", + "-sfi-load", + "-sfi-stack", + "-sfi-branch", + "-sfi-data", + "-no-inline-jumptables", + "-float-abi=hard", + NULL }; + + const char **llc_args = NULL; +#if defined (__pnacl__) + switch (__builtin_nacl_target_arch()) { + case PnaclTargetArchitectureX86_32: { + llc_args = llc_args_x8632; + break; + } + case PnaclTargetArchitectureX86_64: { + llc_args = llc_args_x8664; + break; + } + case PnaclTargetArchitectureARM_32: { + llc_args = llc_args_arm; + break; + } + default: + printerr("no target architecture match.\n"); + delete command_line; + command_line = NULL; + break; + } +#elif defined (__i386__) + llc_args = llc_args_x8632; +#elif defined (__x86_64__) + llc_args = llc_args_x8664; +#else +#error +#endif + for (i = 0; llc_args[i] != NULL; i++) command_line->push_back(llc_args[i]); + return command_line; +} + +void run_with_default_command_line(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + NaClSrpcClosureRunner runner(done); + rpc->result = NACL_SRPC_RESULT_APP_ERROR; + int bitcode_fd = in_args[0]->u.hval; + int object_fd = in_args[1]->u.hval; + string_vector* cmd_line_vec = GetDefaultCommandLine(); + if (DoTranslate(cmd_line_vec, bitcode_fd, object_fd) != 0) { + printerr("DoTranslate failed.\n"); + return; + } + delete cmd_line_vec; + out_args[0]->u.ival = g_bitcode_is_shared_library; + // SRPC deletes the strings returned when the closure is invoked. + // Therefore we need to use strdup. + out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str()); + out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str()); + rpc->result = NACL_SRPC_RESULT_OK; +} + +// Data passed from main thread to compile thread. +// Takes ownership of the commandline vector. +class StreamingThreadData { + public: + StreamingThreadData(int object_fd, string_vector* cmd_line_vec) : + object_fd_(object_fd), cmd_line_vec_(cmd_line_vec) {} + int ObjectFD() const { return object_fd_; } + string_vector* CmdLineVec() const { return cmd_line_vec_.get(); } + const int object_fd_; + const llvm::OwningPtr<string_vector> cmd_line_vec_; +}; + +void *run_streamed(void *arg) { + StreamingThreadData* data = reinterpret_cast<StreamingThreadData*>(arg); + data->CmdLineVec()->push_back("-streaming-bitcode"); + if (DoTranslate(data->CmdLineVec(), 0, data->ObjectFD()) != 0) { + printerr("DoTranslate failed.\n"); + srpc_streamer->setError(); + return NULL; + } + delete data; + return NULL; +} + +// Actually do the work for stream initialization. +void do_stream_init(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done, + string_vector* command_line_vec) { + NaClSrpcClosureRunner runner(done); + rpc->result = NACL_SRPC_RESULT_APP_ERROR; + srpc_streamer = new SRPCStreamer(); + std::string StrError; + StreamingThreadData* thread_data = new StreamingThreadData( + in_args[0]->u.hval, command_line_vec); + NaClBitcodeStreamer = srpc_streamer->init(run_streamed, + reinterpret_cast<void *>(thread_data), + &StrError); + if (NaClBitcodeStreamer) { + rpc->result = NACL_SRPC_RESULT_OK; + out_args[0]->arrays.str = strdup("no error"); + } else { + out_args[0]->arrays.str = strdup(StrError.c_str()); + } +} + +// Invoked by the StreamInit RPC to initialize bitcode streaming over SRPC. +// Under the hood it forks a new thread at starts the llc_main, which sets +// up the compilation and blocks when it tries to start reading the bitcode. +// Input arg is a file descriptor to write the output object file to. +// Returns a string, containing an error message if the call fails. +void stream_init(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + // cmd_line_vec allocated by GetDefaultCommandLine() is freed by the + // translation thread in run_streamed() + do_stream_init(rpc, in_args, out_args, done, GetDefaultCommandLine()); +} + +// Invoked by StreamInitWithCommandLine RPC. Same as stream_init, but +// provides a command line to use instead of the default. +void stream_init_with_command_line(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + char* command_line = in_args[1]->arrays.carr; + size_t command_line_len = in_args[1]->u.count; + string_vector* cmd_line_vec = + CommandLineFromArgz(command_line, command_line_len); + // cmd_line_vec is freed by the translation thread in run_streamed + do_stream_init(rpc, in_args, out_args, done, cmd_line_vec); +} + +// Invoked by the StreamChunk RPC. Receives a chunk of the bitcode and +// buffers it for later retrieval by the compilation thread. +void stream_chunk(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + NaClSrpcClosureRunner runner(done); + rpc->result = NACL_SRPC_RESULT_APP_ERROR; + size_t len = in_args[0]->u.count; + unsigned char *bytes = reinterpret_cast<unsigned char*>( + in_args[0]->arrays.carr); + if (srpc_streamer->gotChunk(bytes, len) != len) { + return; + } + rpc->result = NACL_SRPC_RESULT_OK; +} + +// Invoked by the StreamEnd RPC. Waits until the compilation finishes, +// then returns. Returns an int indicating whether the bitcode is a +// shared library, a string with the soname, a string with dependencies, +// and a string which contains an error message if applicable. +void stream_end(NaClSrpcRpc *rpc, + NaClSrpcArg **in_args, + NaClSrpcArg **out_args, + NaClSrpcClosure *done) { + NaClSrpcClosureRunner runner(done); + rpc->result = NACL_SRPC_RESULT_APP_ERROR; + std::string StrError; + if (srpc_streamer->streamEnd(&StrError)) { + out_args[3]->arrays.str = strdup(StrError.c_str()); + return; + } + out_args[0]->u.ival = g_bitcode_is_shared_library; + // SRPC deletes the strings returned when the closure is invoked. + // Therefore we need to use strdup. + out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str()); + out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str()); + rpc->result = NACL_SRPC_RESULT_OK; +} + +const struct NaClSrpcHandlerDesc srpc_methods[] = { + { "Run:hhC:iss", run }, + { "RunWithDefaultCommandLine:hh:iss", run_with_default_command_line }, + // Protocol for streaming: + // (StreamInit(obj_fd) -> error_str | + // StreamInitWIthCommandLine(obj_fd, escaped_cmdline) -> error_str) + // StreamChunk(data) + + // StreamEnd() -> (is_shared_lib,soname,dependencies,error_str) + { "StreamInit:h:s", stream_init }, + { "StreamInitWithCommandLine:hC:s:", stream_init_with_command_line }, + { "StreamChunk:C:", stream_chunk }, + { "StreamEnd::isss", stream_end }, + { NULL, NULL }, +}; + +} // namespace + +int +main() { + if (!NaClSrpcModuleInit()) { + return 1; + } + + if (!NaClSrpcAcceptClientConnection(srpc_methods)) { + return 1; + } + NaClSrpcModuleFini(); + return 0; +} + +#endif diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp index 41f023d4c4..75ceda61ad 100644 --- a/tools/llvm-dis/llvm-dis.cpp +++ b/tools/llvm-dis/llvm-dis.cpp @@ -51,6 +51,13 @@ static cl::opt<bool> ShowAnnotations("show-annotations", cl::desc("Add informational comments to the .ll file")); +// @LOCALMOD-BEGIN +// Print bitcode metadata only, in text format. +// (includes output format, soname, and dependencies). +static cl::opt<bool> +DumpMetadata("dump-metadata", cl::desc("Dump bitcode metadata")); +// @LOCALMOD-END + namespace { static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { @@ -154,7 +161,7 @@ int main(int argc, char **argv) { OutputFilename = "-"; if (OutputFilename.empty()) { // Unspecified output, infer it. - if (InputFilename == "-") { + if (InputFilename == "-" || DumpMetadata) { // @LOCALMOD OutputFilename = "-"; } else { const std::string &IFN = InputFilename; @@ -176,6 +183,14 @@ int main(int argc, char **argv) { return 1; } + // @LOCALMOD-BEGIN + if (DumpMetadata) { + M->dumpMeta(Out->os()); + Out->keep(); + return 0; + } + // @LOCALMOD-END + OwningPtr<AssemblyAnnotationWriter> Annotator; if (ShowAnnotations) Annotator.reset(new CommentWriter()); diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp index ac82d98b3b..40fd51331e 100644 --- a/tools/llvm-extract/llvm-extract.cpp +++ b/tools/llvm-extract/llvm-extract.cpp @@ -20,6 +20,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/DataLayout.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" // @LOCALMOD #include "llvm/Support/IRReader.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" @@ -47,6 +48,18 @@ Force("f", cl::desc("Enable binary output on terminals")); static cl::opt<bool> DeleteFn("delete", cl::desc("Delete specified Globals from Module")); +// @LOCALMOD-BEGIN +static cl::opt<unsigned> +Divisor("divisor", + cl::init(0), + cl::desc("select GV by position (pos % divisor = remainder ")); + +static cl::opt<unsigned> +Remainder("remainder", + cl::init(0), + cl::desc("select GV by position (pos % divisor = remainder ")); +// @LOCALMOD-END + // ExtractFuncs - The functions to extract from the module. static cl::list<std::string> ExtractFuncs("func", cl::desc("Specify function to extract"), @@ -178,6 +191,24 @@ int main(int argc, char **argv) { } } + // @LOCALMOD-BEGIN + // Extract globals via modulo operation. + size_t count_globals = 0; + if (Divisor != 0) { + size_t pos = 0; + for (Module::global_iterator GV = M->global_begin(), E = M->global_end(); + GV != E; + GV++, pos++) { + if (pos % Divisor == Remainder) { + GVs.insert(&*GV); + } + } + dbgs() << "total globals: " << pos << "\n"; + count_globals = GVs.size(); + dbgs() << "selected globals: " << count_globals << "\n"; + } + // @LOCALMOD-END + // Figure out which functions we should extract. for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) { GlobalValue *GV = M->getFunction(ExtractFuncs[i]); @@ -212,6 +243,22 @@ int main(int argc, char **argv) { } } + // @LOCALMOD-BEGIN + // Extract functions via modulo operation. + if (Divisor != 0) { + size_t pos = 0; + for (Module::iterator F = M->begin(), E = M->end(); + F != E; + F++, pos++) { + if (pos % Divisor == Remainder) { + GVs.insert(&*F); + } + } + dbgs() << "total functions: " << pos << "\n"; + dbgs() << "selected functions: " << GVs.size() - count_globals << "\n"; + } + // @LOCALMOD-END + // Materialize requisite global values. if (!DeleteFn) for (size_t i = 0, e = GVs.size(); i != e; ++i) { diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index b1c4f437ff..5d79fda5aa 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD #include "llvm/Config/config.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" @@ -71,6 +72,16 @@ LTOCodeGenerator::LTOCodeGenerator() InitializeAllTargets(); InitializeAllTargetMCs(); InitializeAllAsmPrinters(); + + // @LOCALMOD-BEGIN + // Preserve symbols which may be referenced due to the lowering + // of an intrinsic. + const llvm::StringSet<> &IntrinsicSymbols = IntrinsicLowering::GetFuncNames(); + for (llvm::StringSet<>::const_iterator it = IntrinsicSymbols.begin(), + ie = IntrinsicSymbols.end(); it != ie; ++it) { + _mustPreserveSymbols[it->getKey().str().c_str()] = 1; + } + // @LOCALMOD-END } LTOCodeGenerator::~LTOCodeGenerator() { @@ -92,6 +103,68 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { return ret; } +// @LOCALMOD-BEGIN +/// Add a module that will be merged with the final output module. +/// The merging does not happen until linkGatheredModulesAndDispose(). +bool LTOCodeGenerator::gatherModuleForLinking(LTOModule* mod) { + _gatheredModules.push_back(mod); +} + +/// Merge all modules gathered from gatherModuleForLinking(), and +/// destroy the source modules in the process. +bool LTOCodeGenerator::linkGatheredModulesAndDispose(std::string& errMsg) { + + // We gather the asm undefs earlier than addModule() does, + // since we delete the modules during linking, and would not be + // able to do this after linking. The undefs vector contain lists + // of global variable names which are considered "used", which will be + // appended into the "llvm.compiler.used" list. The names must be the + // same before linking as they are after linking, since we have switched + // the order. + for (unsigned i = 0, ei = _gatheredModules.size(); i != ei; ++i) { + const std::vector<const char*> &undefs = + _gatheredModules[i]->getAsmUndefinedRefs(); + for (int j = 0, ej = undefs.size(); j != ej; ++j) { + _asmUndefinedRefs[undefs[j]] = 1; + } + } + + // Tree-reduce the mods, re-using the incoming mods as scratch + // intermediate results. Module i is linked with (i + stride), with i as + // the dest. We begin with a stride of 1, and double each time. E.g., + // after the first round, only the even-indexed modules are still available, + // and after the second, only those with index that are a multiple of 4 + // are available. Eventually the Module with the content of all other modules + // will be Module 0. + // NOTE: we may be able to be smarter about linking if we did not do them + // pairwise using Linker::LinkModules. We also disregard module sizes + // and try our best to keep the modules in order (linking adjacent modules). + for (unsigned stride = 1, len = _gatheredModules.size(); + stride < len; + stride *= 2) { + for (unsigned i = 0; i + stride < len; i = i + (stride * 2)) { + if (Linker::LinkModules(_gatheredModules[i]->getLLVVMModule(), + _gatheredModules[i+stride]->getLLVVMModule(), + Linker::DestroySource, &errMsg)) { + errs() << "LinkModules " << i << " w/ " << i + stride << " failed...\n"; + // We leak the memory in this case... + return true; + } + delete _gatheredModules[i+stride]; + } + } + + // Finally, link Node 0 with the Dest and delete Node 0. + if (_linker.LinkInModule(_gatheredModules[0]->getLLVVMModule(), &errMsg)) { + errs() << "LinkModules Dst w/ _gatheredModules[0] failed...\n"; + return true; + } + delete _gatheredModules[0]; + + return false; +} +// @LOCALMOD-END + bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug, std::string& errMsg) { switch (debug) { @@ -118,6 +191,81 @@ bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model, llvm_unreachable("Unknown PIC model!"); } +// @LOCALMOD-BEGIN +void LTOCodeGenerator::setMergedModuleOutputFormat(lto_output_format format) +{ + Module::OutputFormat outputFormat; + switch (format) { + case LTO_OUTPUT_FORMAT_OBJECT: + outputFormat = Module::ObjectOutputFormat; + break; + case LTO_OUTPUT_FORMAT_SHARED: + outputFormat = Module::SharedOutputFormat; + break; + case LTO_OUTPUT_FORMAT_EXEC: + outputFormat = Module::ExecutableOutputFormat; + break; + } + Module *mergedModule = _linker.getModule(); + mergedModule->setOutputFormat(outputFormat); +} + +void LTOCodeGenerator::setMergedModuleSOName(const char *soname) +{ + Module *mergedModule = _linker.getModule(); + mergedModule->setSOName(soname); +} + +void LTOCodeGenerator::addLibraryDep(const char *lib) +{ + Module *mergedModule = _linker.getModule(); + mergedModule->addLibrary(lib); +} + +void LTOCodeGenerator::wrapSymbol(const char *sym) +{ + Module *mergedModule = _linker.getModule(); + mergedModule->wrapSymbol(sym); +} + +const char* LTOCodeGenerator::setSymbolDefVersion(const char *sym, + const char *ver, + bool is_default) +{ + Module *mergedModule = _linker.getModule(); + GlobalValue *GV = mergedModule->getNamedValue(sym); + if (!GV) { + llvm_unreachable("Invalid global in setSymbolDefVersion"); + } + GV->setVersionDef(ver, is_default); + return strdup(GV->getName().str().c_str()); +} + +const char* LTOCodeGenerator::setSymbolNeeded(const char *sym, + const char *ver, + const char *dynfile) +{ + Module *mergedModule = _linker.getModule(); + GlobalValue *GV = mergedModule->getNamedValue(sym); + if (!GV) { + // Symbol lookup may have failed because this symbol was already + // renamed for versioning. Make sure this is the case. + if (strchr(sym, '@') != NULL || ver == NULL || ver[0] == '\0') { + llvm_unreachable("Unexpected condition in setSymbolNeeded"); + } + std::string NewName = std::string(sym) + "@" + ver; + GV = mergedModule->getNamedValue(NewName); + } + if (!GV) { + // Ignore failures due to unused declarations. + // This caused a falure to build libppruntime.so for glibc. + // TODO(sehr): better document under which circumstances this is needed. + return sym; + } + GV->setNeeded(ver, dynfile); + return strdup(GV->getName().str().c_str()); +} +// @LOCALMOD-END bool LTOCodeGenerator::writeMergedModules(const char *path, std::string &errMsg) { if (determineTarget(errMsg)) diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h index 3081b7dad1..de3d1fa8a5 100644 --- a/tools/lto/LTOCodeGenerator.h +++ b/tools/lto/LTOCodeGenerator.h @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm-c/lto.h" #include <string> +#include <vector> namespace llvm { class LLVMContext; @@ -40,6 +41,12 @@ struct LTOCodeGenerator { ~LTOCodeGenerator(); bool addModule(struct LTOModule*, std::string &errMsg); + // @LOCALMOD-BEGIN + // Alternative methods of adding modules, which delay merging modules until + // all modules are available. + bool gatherModuleForLinking(struct LTOModule*); + bool linkGatheredModulesAndDispose(std::string &errMsg); + // @LOCALMOD-END bool setDebugInfo(lto_debug_model, std::string &errMsg); bool setCodePICModel(lto_codegen_model, std::string &errMsg); @@ -50,6 +57,16 @@ struct LTOCodeGenerator { } bool writeMergedModules(const char *path, std::string &errMsg); + // @LOCALMOD-BEGIN + void setMergedModuleOutputFormat(lto_output_format format); + void setMergedModuleSOName(const char *soname); + void addLibraryDep(const char *lib); + void wrapSymbol(const char *sym); + const char* setSymbolDefVersion(const char *sym, const char *ver, + bool is_default); + const char* setSymbolNeeded(const char *sym, const char *ver, + const char *dynfile); + // @LOCALMOD-END bool compile_to_file(const char **name, std::string &errMsg); const void *compile(size_t *length, std::string &errMsg); void setCodeGenDebugOptions(const char *opts); @@ -77,6 +94,9 @@ private: std::vector<char*> _codegenOptions; std::string _mCpu; std::string _nativeObjectPath; + + // @LOCALMOD + std::vector<LTOModule*> _gatheredModules; }; #endif // LTO_CODE_GENERATOR_H diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp index ffdcbe644c..cb8a4e5f0d 100644 --- a/tools/lto/LTOModule.cpp +++ b/tools/lto/LTOModule.cpp @@ -17,6 +17,8 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD + #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" @@ -27,6 +29,7 @@ #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" // @LOCALMOD #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -271,7 +274,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, } // parse bitcode buffer - OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext(), + OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), // @LOCALMOD &errMsg)); if (!m) { delete buffer; @@ -304,6 +307,13 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, getTargetOptions(Options); TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, Options); + + // @LOCALMOD-BEGIN + // Add declarations for functions which may be used by intrinsics. + IntrinsicLowering IL(*target->getDataLayout()); + IL.AddPrototypes(*m); + // @LOCALMOD-END + LTOModule *Ret = new LTOModule(m.take(), target); if (Ret->parseSymbols(errMsg)) { delete Ret; @@ -319,6 +329,33 @@ MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) { return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false); } +// @LOCALMOD-BEGIN +lto_output_format LTOModule::getOutputFormat() { + Module::OutputFormat format = _module->getOutputFormat(); + switch (format) { + case Module::ObjectOutputFormat: return LTO_OUTPUT_FORMAT_OBJECT; + case Module::SharedOutputFormat: return LTO_OUTPUT_FORMAT_SHARED; + case Module::ExecutableOutputFormat: return LTO_OUTPUT_FORMAT_EXEC; + } + llvm_unreachable("Unknown output format in LTOModule"); +} + +const char *LTOModule::getSOName() { + return _module->getSOName().c_str(); +} + +const char* LTOModule::getLibraryDep(uint32_t index) { + const Module::LibraryListType &Libs = _module->getLibraries(); + if (index < Libs.size()) + return Libs[index].c_str(); + return NULL; +} + +uint32_t LTOModule::getNumLibraryDeps() { + return _module->getLibraries().size(); +} +// @LOCALMOD-END + /// objcClassNameFromExpression - Get string that the data pointer points to. bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) { if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) { @@ -612,6 +649,16 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, bool isFunc) { if (decl->getName().startswith("llvm.")) return; + // @LOCALMOD-BEGIN + // Bitcode modules may have declarations for functions or globals + // which are unused. Ignore them here so that gold does not mistake + // them for undefined symbols. But don't ignore declarations for + // functions which are potentially used by intrinsics. + if (decl->use_empty() && + !IntrinsicLowering::IsCalledByIntrinsic(decl->getName())) + return; + // @LOCALMOD-END + // ignore all aliases if (isa<GlobalAlias>(decl)) return; @@ -788,6 +835,12 @@ namespace { unsigned MaxBytesToEmit) {} virtual bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value ) { return false; } + // @LOCALMOD-BEGIN + virtual void EmitBundleLock() {} + virtual void EmitBundleUnlock() {} + virtual void EmitBundleAlignStart() {} + virtual void EmitBundleAlignEnd() {} + // @LOCALMOD-END virtual void EmitFileDirective(StringRef Filename) {} virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h index 8e52206b5b..03c16d08db 100644 --- a/tools/lto/LTOModule.h +++ b/tools/lto/LTOModule.h @@ -99,6 +99,14 @@ public: _module->setTargetTriple(triple); } + // @LOCALMOD-BEGIN + lto_output_format getOutputFormat(); + const char* getSOName(); + const char* getLibraryDep(uint32_t index); + uint32_t getNumLibraryDeps(); + // @LOCALMOD-END + + /// getSymbolCount - Get the number of symbols uint32_t getSymbolCount() { return _symbols.size(); diff --git a/tools/lto/Makefile b/tools/lto/Makefile index 3610fed03b..f9392a6911 100644 --- a/tools/lto/Makefile +++ b/tools/lto/Makefile @@ -57,3 +57,11 @@ ifeq ($(HOST_OS),Darwin) -Wl,-object_path_lto -Wl,$(TempFile) endif endif + +#@ LOCALMOD-BEGIN +# This is to fix an upstream bug. It is in the process of being upstreamed. +# This line can be removed after it has been fixed upstream and we've merged. +ifneq ($(HOST_OS),Darwin) + LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-soname=$(SharedPrefix)LTO$(SHLIBEXT) +endif +#@ LOCALMOD-END diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index a7e633d14b..a7c335c934 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -15,6 +15,8 @@ #include "llvm-c/lto.h" #include "llvm-c/Core.h" +#include "llvm/Support/CommandLine.h" // @LOCALMOD + #include "LTOModule.h" #include "LTOCodeGenerator.h" @@ -23,6 +25,25 @@ // *** Not thread safe *** static std::string sLastErrorString; +// @LOCALMOD-BEGIN +static std::vector<const char*> lto_options; +extern void lto_add_command_line_option(const char* opt) +{ + // ParseCommandLineOptions() expects argv[0] to be program name. + if (lto_options.empty()) + lto_options.push_back("libLTO"); + + lto_options.push_back(strdup(opt)); +} + +extern void lto_parse_command_line_options() +{ + if ( !lto_options.empty() ) + llvm::cl::ParseCommandLineOptions(lto_options.size(), + const_cast<char **>(<o_options[0])); +} +// @LOCALMOD-END + /// lto_get_version - Returns a printable string. extern const char* lto_get_version() { return LTOCodeGenerator::getVersionString(); @@ -107,6 +128,45 @@ void lto_module_set_target_triple(lto_module_t mod, const char *triple) { return mod->setTargetTriple(triple); } +// @LOCALMOD-BEGIN + +// +// Get the module format for this module +// +lto_output_format lto_module_get_output_format(lto_module_t mod) +{ + return mod->getOutputFormat(); +} + +// +// Get the module soname +// +const char* lto_module_get_soname(lto_module_t mod) +{ + return mod->getSOName(); +} + +// +// Get the i'th library dependency. +// Returns NULL if i >= lto_module_get_num_library_deps() +// +const char * +lto_module_get_library_dep(lto_module_t mod, unsigned int i) +{ + return mod->getLibraryDep(i); +} + +// +// Return the number of library dependencies of this module. +// +unsigned int +lto_module_get_num_library_deps(lto_module_t mod) +{ + return mod->getNumLibraryDeps(); +} + +// @LOCALMOD-END + /// lto_module_get_num_symbols - Returns the number of symbols in the object /// module. unsigned int lto_module_get_num_symbols(lto_module_t mod) { @@ -145,6 +205,16 @@ bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod) { return cg->addModule(mod, sLastErrorString); } +// @LOCALMOD-BEGIN +bool lto_codegen_gather_module_for_link(lto_code_gen_t cg, lto_module_t mod) { + return cg->gatherModuleForLinking(mod); +} + +bool lto_codegen_link_gathered_modules_and_dispose(lto_code_gen_t cg) { + return cg->linkGatheredModulesAndDispose(sLastErrorString); +} +// @LOCALMOD-END + /// lto_codegen_set_debug_model - Sets what if any format of debug info should /// be generated. Returns true on error (check lto_get_error_message() for /// details). @@ -183,6 +253,77 @@ void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, cg->addMustPreserveSymbol(symbol); } +// @LOCALMOD-BEGIN + +// +// Set the module format for the merged module +// +void lto_codegen_set_merged_module_output_format(lto_code_gen_t cg, + lto_output_format format) +{ + cg->setMergedModuleOutputFormat(format); +} + +// +// Set the module soname (for shared library bitcode) +// +void lto_codegen_set_merged_module_soname(lto_code_gen_t cg, + const char* soname) +{ + cg->setMergedModuleSOName(soname); +} + +// +// Add a library dependency to the linked bitcode module. +// +void lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg, + const char* soname) +{ + cg->addLibraryDep(soname); +} + +// +// Apply symbol wrapping in the linked bitcode module. +// +void lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg, + const char* sym) { + cg->wrapSymbol(sym); +} + +// +// Set the symbol version of defined symbol 'sym'. +// 'sym' is the name of the GlobalValue, exactly as it is +// in the LLVM module. It may already have a version suffix. +// In that case, this function verifies that the old version +// and new version match. +// Returns a reference to the new name. +// +const char * +lto_codegen_set_symbol_def_version(lto_code_gen_t cg, + const char *sym, + const char *version, + bool is_default) { + return cg->setSymbolDefVersion(sym, version, is_default); +} + +// +// Set the symbol version of needed symbol 'sym' from file 'dynfile'. +// 'sym' is the name of the GlobalValue, exactly as it is +// in the LLVM module. It may already have a version suffix. +// In that case, this function verifies that the old version +// and new version match. +// In any case, it adds a NeededRecord entry. +// Returns a reference to the new name. +// +const char* +lto_codegen_set_symbol_needed(lto_code_gen_t cg, + const char *sym, + const char *version, + const char *dynfile) { + return cg->setSymbolNeeded(sym, version, dynfile); +} +// @LOCALMOD-END + /// lto_codegen_write_merged_modules - Writes a new file at the specified path /// that contains the merged contents of all modules added so far. Returns true /// on error (check lto_get_error_message() for details). diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports index 4940bb147e..e589c5d2c6 100644 --- a/tools/lto/lto.exports +++ b/tools/lto/lto.exports @@ -1,3 +1,5 @@ +lto_add_command_line_option +lto_parse_command_line_options lto_get_error_message lto_get_version lto_module_create @@ -9,16 +11,25 @@ lto_module_get_symbol_attribute lto_module_get_symbol_name lto_module_get_target_triple lto_module_set_target_triple +lto_module_get_output_format +lto_module_get_soname +lto_module_get_library_dep +lto_module_get_num_library_deps lto_module_is_object_file lto_module_is_object_file_for_target lto_module_is_object_file_in_memory lto_module_is_object_file_in_memory_for_target lto_module_dispose lto_codegen_add_module +lto_codegen_gather_module_for_link +lto_codegen_link_gathered_modules_and_dispose lto_codegen_add_must_preserve_symbol lto_codegen_compile lto_codegen_create lto_codegen_dispose +lto_codegen_set_assembler_args +lto_codegen_set_assembler_path +lto_codegen_set_cpu lto_codegen_set_debug_model lto_codegen_set_pic_model lto_codegen_write_merged_modules @@ -26,6 +37,12 @@ lto_codegen_debug_options lto_codegen_set_assembler_args lto_codegen_set_assembler_path lto_codegen_set_cpu +lto_codegen_set_merged_module_output_format +lto_codegen_set_merged_module_soname +lto_codegen_add_merged_module_library_dep +lto_codegen_set_symbol_def_version +lto_codegen_set_symbol_needed +lto_codegen_wrap_symbol_in_merged_module lto_codegen_compile_to_file LLVMCreateDisasm LLVMDisasmDispose diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index bac0d46947..0390bc470a 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -580,6 +580,7 @@ int main(int argc, char **argv) { initializeInstCombine(Registry); initializeInstrumentation(Registry); initializeTarget(Registry); + initializeExpandCtorsPass(Registry); cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .bc modular optimizer and analysis printer\n"); diff --git a/tools/pso-stub/CMakeLists.txt b/tools/pso-stub/CMakeLists.txt new file mode 100644 index 0000000000..4b2f779cb0 --- /dev/null +++ b/tools/pso-stub/CMakeLists.txt @@ -0,0 +1,5 @@ +set(LLVM_LINK_COMPONENTS bitreader bitwriter object support analysis) + +add_llvm_tool(pso-stub + pso-stub.cpp + ) diff --git a/tools/pso-stub/LLVMBuild.txt b/tools/pso-stub/LLVMBuild.txt new file mode 100644 index 0000000000..e643053dbf --- /dev/null +++ b/tools/pso-stub/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/pso-stub/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = pso-stub +parent = Tools +required_libraries = BitReader BitWriter Object Support Analysis diff --git a/tools/pso-stub/Makefile b/tools/pso-stub/Makefile new file mode 100644 index 0000000000..c2860e65f6 --- /dev/null +++ b/tools/pso-stub/Makefile @@ -0,0 +1,18 @@ +##===- tools/pso-stub/Makefile -----------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL := ../.. +TOOLNAME := pso-stub +LINK_COMPONENTS := bitreader bitwriter object support analysis + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS := 1 + +include $(LEVEL)/Makefile.common + diff --git a/tools/pso-stub/pso-stub.cpp b/tools/pso-stub/pso-stub.cpp new file mode 100644 index 0000000000..1fdc868499 --- /dev/null +++ b/tools/pso-stub/pso-stub.cpp @@ -0,0 +1,309 @@ +/*===- pso-stub.c - Create bitcode shared object stubs -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Create a bitcode stub for a native shared object. +// Usage: pso-stub <input.so> -o <output.pso> +// +// The stub bitcode file contains the same dynamic symbols as the input shared +// object, with identical attributes (e.g. weak, undefined, TLS). +// +// Undefined functions become declarations in the bitcode. +// Undefined variables become external variable declarations in the bitcode. +// Defined functions become trivial stub functions in the bitcode (which do +// nothing but "ret void"). +// Defined object/tls symbols became dummy variable definitions (int foo = 0). +// +// The generated bitcode is suitable for linking against (as a shared object), +// but nothing else. +// +// TODO(pdox): Implement GNU symbol versioning. +// TODO(pdox): Mark IFUNC symbols as functions, and store +// this attribute as metadata. +//===----------------------------------------------------------------------===*/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/GlobalValue.h" +#include "llvm/Type.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Constant.h" +#include "llvm/Constants.h" +#include "llvm/Instructions.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Object/ELF.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/APInt.h" + +using namespace llvm; +using namespace llvm::object; + +namespace { + +cl::opt<std::string> +InputFilename(cl::Positional, cl::desc("<input native shared object>"), + cl::init("")); + +cl::opt<std::string> +OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename")); + +// Variables / declarations to place in llvm.used array. +std::vector<GlobalValue*> LLVMUsed; + +void AddUsedGlobal(GlobalValue *GV) { + // Clang normally asserts that these are not decls. We do need + // decls to survive though, and those are really the ones we + // worry about, so only add those. + // We run verifyModule() below, so that we know this is somewhat valid. + if (GV->isDeclaration()) { + LLVMUsed.push_back(GV); + } +} + +// Emit llvm.used array. +// This is almost exactly like clang/lib/CodeGen/CodeGenModule.cpp::EmitLLVMUsed +void EmitLLVMUsed(Module *M) { + // Don't create llvm.used if there is no need. + if (LLVMUsed.empty()) + return; + + Type *Int8PtrTy = Type::getInt8PtrTy(M->getContext()); + // Convert LLVMUsed to what ConstantArray needs. + SmallVector<llvm::Constant*, 8> UsedArray; + UsedArray.resize(LLVMUsed.size()); + for (unsigned i = 0, e = LLVMUsed.size(); i != e; ++i) { + UsedArray[i] = + llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*LLVMUsed[i]), + Int8PtrTy); + } + + if (UsedArray.empty()) + return; + llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size()); + + llvm::GlobalVariable *GV = + new llvm::GlobalVariable(*M, ATy, false, + llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), + "llvm.used"); + + GV->setSection("llvm.metadata"); +} + +// Add a stub function definition or declaration +void +AddFunction(Module *M, + GlobalValue::LinkageTypes Linkage, + const StringRef &Name, + bool isDefine) { + // Create an empty function with no arguments. + // void Name(void); + Type *RetTy = Type::getVoidTy(M->getContext()); + FunctionType *FT = FunctionType::get(RetTy, /*isVarArg=*/ false); + Function *F = Function::Create(FT, Linkage, Name, M); + if (isDefine) { + // Add a single basic block with "ret void" + BasicBlock *BB = BasicBlock::Create(F->getContext(), "", F); + BB->getInstList().push_back(ReturnInst::Create(F->getContext())); + } + AddUsedGlobal(F); +} + +// Add a stub global variable declaration or definition. +void +AddGlobalVariable(Module *M, + GlobalValue::LinkageTypes Linkage, + const StringRef &Name, + bool isTLS, + bool isDefine) { + // Use 'int' as the dummy type. + Type *Ty = Type::getInt32Ty(M->getContext()); + + Constant *InitVal = NULL; + if (isDefine) { + // Define to dummy value, 0. + InitVal = Constant::getIntegerValue(Ty, APInt(32, 0)); + } + GlobalVariable *GV = + new GlobalVariable(*M, Ty, /*isConstant=*/ false, + Linkage, /*Initializer=*/ InitVal, + Twine(Name), /*InsertBefore=*/ NULL, + isTLS ? GlobalVariable::GeneralDynamicTLSModel : + GlobalVariable::NotThreadLocal, + /*AddressSpace=*/ 0); + AddUsedGlobal(GV); +} + +// Iterate through the ObjectFile's needed libraries, and +// add them to the module. +void TransferLibrariesNeeded(Module *M, const ObjectFile *obj) { + library_iterator it = obj->begin_libraries_needed(); + library_iterator ie = obj->end_libraries_needed(); + error_code ec; + for (; it != ie; it.increment(ec)) { + StringRef path; + it->getPath(path); + outs() << "Adding library " << path << "\n"; + M->addLibrary(path); + } +} + +// Set the Module's SONAME from the ObjectFile +void TransferLibraryName(Module *M, const ObjectFile *obj) { + StringRef soname = obj->getLoadName(); + outs() << "Setting soname to: " << soname << "\n"; + M->setSOName(soname); +} + +// Create stubs in the module for the dynamic symbols +void TransferDynamicSymbols(Module *M, const ObjectFile *obj) { + // Iterate through the dynamic symbols in the ObjectFile. + symbol_iterator it = obj->begin_dynamic_symbols(); + symbol_iterator ie = obj->end_dynamic_symbols(); + error_code ec; + for (; it != ie; it.increment(ec)) { + const SymbolRef &sym = *it; + StringRef Name; + SymbolRef::Type Type; + uint32_t Flags; + + sym.getName(Name); + sym.getType(Type); + sym.getFlags(Flags); + + // Ignore debug info and section labels + if (Flags & SymbolRef::SF_FormatSpecific) + continue; + + // Ignore local symbols + if (!(Flags & SymbolRef::SF_Global)) + continue; + outs() << "Transferring symbol " << Name << "\n"; + + bool isFunc = (Type == SymbolRef::ST_Function); + bool isUndef = (Flags & SymbolRef::SF_Undefined); + bool isTLS = (Flags & SymbolRef::SF_ThreadLocal); + bool isCommon = (Flags & SymbolRef::SF_Common); + bool isWeak = (Flags & SymbolRef::SF_Weak); + + if (Type == SymbolRef::ST_Unknown) { + // Weak symbols can be "v" according to NM, which are definitely + // data, but they may also be "w", which are of unknown type. + // Thus there is already a mechanism to say "weak object", but not + // for weak function. Assume unknown weak symbols are functions. + if (isWeak) { + outs() << "Warning: Symbol '" << Name << + "' has unknown type (weak). Assuming function.\n"; + Type = SymbolRef::ST_Function; + isFunc = true; + } else { + // If it is undef, we likely don't care, since it won't be used + // to bind to unresolved symbols in the real pexe and real pso. + // Other cases seen where it is not undef: _end, __bss_start, + // which are markers provided by the linker scripts. + outs() << "Warning: Symbol '" << Name << + "' has unknown type (isUndef=" << isUndef << "). Assuming data.\n"; + Type = SymbolRef::ST_Data; + isFunc = false; + } + } + + // Determine Linkage type. + GlobalValue::LinkageTypes Linkage; + if (isWeak) + Linkage = isUndef ? GlobalValue::ExternalWeakLinkage : + GlobalValue::WeakAnyLinkage; + else if (isCommon) + Linkage = GlobalValue::CommonLinkage; + else + Linkage = GlobalValue::ExternalLinkage; + + if (isFunc) + AddFunction(M, Linkage, Name, !isUndef); + else + AddGlobalVariable(M, Linkage, Name, isTLS, !isUndef); + } +} + +} // namespace + + +int main(int argc, const char** argv) { + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + LLVMContext &Context = getGlobalContext(); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + cl::ParseCommandLineOptions(argc, argv, + "Portable Shared Object Stub Maker\n"); + + if (InputFilename.empty()) { + errs() << "Please specify an input filename\n"; + return 1; + } + if (OutputFilename.empty()) { + errs() << "Please specify an output filename with -o\n"; + return 1; + } + + // Open the object file + OwningPtr<MemoryBuffer> File; + if (MemoryBuffer::getFile(InputFilename, File)) { + errs() << InputFilename << ": Open failed\n"; + return 1; + } + + ObjectFile *obj = ObjectFile::createObjectFile(File.take()); + if (!obj) { + errs() << InputFilename << ": Object type not recognized\n"; + } + + // Create the new module + OwningPtr<Module> M(new Module(InputFilename, Context)); + + // Transfer the relevant ELF information + M->setOutputFormat(Module::SharedOutputFormat); + TransferLibrariesNeeded(M.get(), obj); + TransferLibraryName(M.get(), obj); + TransferDynamicSymbols(M.get(), obj); + EmitLLVMUsed(M.get()); + + // Verify the module + std::string Err; + if (verifyModule(*M.get(), ReturnStatusAction, &Err)) { + errs() << "Module created is invalid:\n"; + errs() << Err; + return 1; + } + + // Write the module to a file + std::string ErrorInfo; + OwningPtr<tool_output_file> Out( + new tool_output_file(OutputFilename.c_str(), ErrorInfo, + raw_fd_ostream::F_Binary)); + if (!ErrorInfo.empty()) { + errs() << ErrorInfo << '\n'; + return 1; + } + WriteBitcodeToFile(M.get(), Out->os()); + Out->keep(); + return 0; +} diff --git a/utils/Makefile b/utils/Makefile index 7a3c17d032..f972b6596f 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -11,6 +11,15 @@ LEVEL = .. PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \ count fpcmp llvm-lit not unittest yaml2obj +ifeq ($(NACL_SANDBOX),1) + # In sandboxed mode, just build the bare minimum + # Note: TableGen is usually built twice: + # * once with host compiler + # * also with the "given" compiler + # Here we just disable that second build + PARALLEL_DIRS := +endif + EXTRA_DIST := check-each-file codegen-diff countloc.sh \ DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \ getsrcs.sh llvmdo llvmgrep llvm-native-gcc \ diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp index c9992eb392..bd55e697c5 100644 --- a/utils/TableGen/CodeGenTarget.cpp +++ b/utils/TableGen/CodeGenTarget.cpp @@ -310,6 +310,12 @@ void CodeGenTarget::ComputeInstrsByEnum() const { "BUNDLE", "LIFETIME_START", "LIFETIME_END", + // @LOCALMOD-BEGIN + "BUNDLE_ALIGN_START", + "BUNDLE_ALIGN_END", + "BUNDLE_LOCK", + "BUNDLE_UNLOCK", + // @LOCALMOD-END 0 }; const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions(); diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp index ea2545050b..4101076f33 100644 --- a/utils/TableGen/EDEmitter.cpp +++ b/utils/TableGen/EDEmitter.cpp @@ -273,6 +273,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type, REG("RFP32"); REG("GR64"); REG("GR64_NOAX"); + REG("GR32_TC_64"); // @LOCALMOD REG("GR64_TC"); REG("FR64"); REG("VR64"); |