LOCALMODs from hg 0b098ca44de7 against r158408 (hg 90a87d6bfe45)

(only non-new files; new files in git 4f429c8b) Change-Id: Ia39f818088485bd90e4d048db404f8d6ba5f836b
author: Derek Schuff <dschuff@chromium.org> 2012-07-09 10:52:46 -0700
committer: Derek Schuff <dschuff@chromium.org> 2012-07-09 11:00:37 -0700
commit: 5dbcc7e0c9c12f4a4042fb4a226654aee927999c (patch)
tree: b316a3370e9286cb4e6f81b2f9d8bd8b54ce5123
parent: 86dc97be9ac3b4804528e087b04b4f4192cdee54 (diff)
181 files changed, 5763 insertions, 344 deletions
diff --git a/Makefile b/Makefile
index ec24862ad6..08ffbf4091 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,11 @@ EXTRA_DIST := test unittests llvm.spec include win32 Xcode
 
 include $(LEVEL)/Makefile.config
 
+ifeq ($(NACL_SANDBOX),1)
+  DIRS := $(filter-out tools/llvm-shlib runtime docs unittests, $(DIRS))
+  OPTIONAL_DIRS :=
+endif
+
 ifneq ($(ENABLE_SHARED),1)
   DIRS := $(filter-out tools/llvm-shlib, $(DIRS))
 endif
@@ -119,6 +124,7 @@ cross-compile-build-tools:
 	(unset SDKROOT; \
 	 $(MAKE) -C BuildTools \
 	  BUILD_DIRS_ONLY=1 \
+	  NACL_SANDBOX=0 \
 	  UNIVERSAL= \
 	  TARGET_NATIVE_ARCH="$(TARGET_NATIVE_ARCH)" \
 	  TARGETS_TO_BUILD="$(TARGETS_TO_BUILD)" \
diff --git a/Makefile.rules b/Makefile.rules
index 4b065239bb..6ea6eb5938 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -638,6 +638,17 @@ endif
 endif
 endif
 
+ifeq ($(NACL_SANDBOX),1)
+  LIBS += -lsrpc -limc_syscalls -lplatform -lgio -lpthread -lm -lnacl -lnacl_dyncode -lnosys
+  ifeq ($(USE_TCMALLOC),1)
+    # Note: -ltcmalloc_minimal needs to stay last on the link line
+    LIBS += -ltcmalloc_minimal
+    CXX.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+    C.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+  endif
+else
+  LIBS +=
+endif
 
 #----------------------------------------------------------
 # Options To Invoke Tools
diff --git a/autoconf/config.sub b/autoconf/config.sub
index 9942491533..a4f411f6c6 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -239,6 +239,10 @@ case $os in
 		basic_machine=m68k-atari
 		os=-mint
 		;;
+        -nacl*)
+                basic_machine=i686-pc
+                os=-nacl
+                ;;
 esac
 
 # Decode aliases for certain CPU-COMPANY combinations.
@@ -347,6 +351,14 @@ case $basic_machine in
 	i*86 | x86_64)
 	  basic_machine=$basic_machine-pc
 	  ;;
+        nacl64*)
+          basic_machine=x86_64-pc
+          os=-nacl
+          ;;
+        nacl*)
+          basic_machine=i686-pc
+          os=-nacl
+          ;;
 	# Object if more than one company name word.
 	*-*-*)
 		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
@@ -1364,6 +1376,9 @@ case $os in
 			;;
 		esac
 		;;
+        -nacl*)
+                os=-nacl
+                ;;
 	-nto-qnx*)
 		;;
 	-nto*)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index e751059a28..5e1b194e25 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -266,6 +266,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Freestanding"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
diff --git a/configure b/configure
index 10bd445977..73f4ebc81c 100755
--- a/configure
+++ b/configure
@@ -3776,6 +3776,11 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Freestanding"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index f43d365e3d..c6f4417e19 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -60,6 +60,13 @@ typedef enum {
     LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC = 2
 } lto_codegen_model;
 
+/* @LOCALMOD-BEGIN */
+typedef enum {
+    LTO_OUTPUT_FORMAT_OBJECT = 0,  /* object file */
+    LTO_OUTPUT_FORMAT_SHARED = 1,  /* shared library */
+    LTO_OUTPUT_FORMAT_EXEC   = 2   /* executable */
+} lto_output_format;
+/* @LOCALMOD-END */
 
 /** opaque reference to a loaded object module */
 typedef struct LTOModule*         lto_module_t;
@@ -71,6 +78,17 @@ typedef struct LTOCodeGenerator*  lto_code_gen_t;
 extern "C" {
 #endif
 
+
+/* @LOCALMOD-BEGIN */
+
+/* Add a command-line option */
+void lto_add_command_line_option(const char* opt);
+
+/* Parse command line options */
+void lto_parse_command_line_options();
+
+/* @LOCALMOD-END */
+
 /**
  * Returns a printable string.
  */
@@ -165,6 +183,36 @@ lto_module_get_target_triple(lto_module_t mod);
 extern void
 lto_module_set_target_triple(lto_module_t mod, const char *triple);
 
+/* @LOCALMOD-BEGIN */
+
+/**
+ * Get the module format for this module
+ */
+extern lto_output_format
+lto_module_get_output_format(lto_module_t mod);
+
+/**
+ * Get the module soname
+ */
+extern const char*
+lto_module_get_soname(lto_module_t mod);
+
+
+/**
+ * Get the i'th library dependency.
+ * Returns NULL if i >= lto_module_get_num_library_deps()
+ */
+extern const char*
+lto_module_get_library_dep(lto_module_t mod, unsigned int i);
+
+
+/**
+ * Return the number of library dependencies of this module.
+ */
+extern unsigned int
+lto_module_get_num_library_deps(lto_module_t mod);
+
+/* @LOCALMOD-END */
 
 /**
  * Returns the number of symbols in the object module.
@@ -258,6 +306,56 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
 extern void
 lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol);
 
+/* @LOCALMOD-BEGIN */
+
+/**
+ * Sets the module type for the merged module
+ */
+extern void
+lto_codegen_set_merged_module_output_format(lto_code_gen_t cg,
+                                            lto_output_format format);
+
+/**
+ * Sets the SOName for the merged module
+ */
+extern void
+lto_codegen_set_merged_module_soname(lto_code_gen_t cg,
+                                     const char *soname);
+
+/**
+ * Add a library dependency to the merged module
+ */
+extern void
+lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg,
+                                          const char *lib);
+
+/**
+ * Wrap a symbol in the merged module.
+ */
+extern void
+lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg,
+                                         const char *sym);
+
+
+/**
+ * Set version of a defined symbol in the merged module
+ */
+extern const char *
+lto_codegen_set_symbol_def_version(lto_code_gen_t cg,
+                                   const char *sym,
+                                   const char *version,
+                                   bool is_default);
+
+
+/**
+ * Set version of an undefined symbol in the merged module
+ */
+extern const char *
+lto_codegen_set_symbol_needed(lto_code_gen_t cg,
+                              const char *sym,
+                              const char *version,
+                              const char *dynfile);
+/* @LOCALMOD-END */
 /**
  * Writes a new object file at the specified path that contains the
  * merged contents of all modules added so far.
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 56a87f139a..2e0184a61d 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -93,6 +93,12 @@ namespace llvm {
     /// default, this is equal to CurrentFnSym.
     MCSymbol *CurrentFnSymForSize;
 
+    /// @LOCALMOD-BEGIN
+    /// Is the bitcode module a plain object? This is false
+    /// for shared (pso) and executable (pexe) files.
+    bool IsPlainObject;
+    /// @LOCALMOD-END
+
   private:
     // GCMetadataPrinters - The garbage collection metadata printer table.
     void *GCMetadataPrinters;  // Really a DenseMap.
@@ -239,6 +245,18 @@ namespace llvm {
     // Targets can, or in the case of EmitInstruction, must implement these to
     // customize output.
 
+    // @LOCALMOD-START
+    /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata.
+    virtual bool UseReadOnlyJumpTables() const { return false; }
+    /// GetTargetBasicBlockAlign - the target alignment for basic blocks.
+    virtual unsigned GetTargetBasicBlockAlign() const { return 0; }
+    /// GetTargetLabelAlign - Get optional alignment for TargetOpcode
+    /// labels E.g., EH_LABEL.
+    virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const {
+      return 0;
+    }
+    // @LOCALMOD-END
+
     /// EmitStartOfAsmFile - This virtual method can be overridden by targets
     /// that want to emit something at the start of their file.
     virtual void EmitStartOfAsmFile(Module &) {}
@@ -253,7 +271,12 @@ namespace llvm {
 
     /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
     /// the last basic block in the function.
-    virtual void EmitFunctionBodyEnd() {}
+    virtual void EmitFunctionBodyEnd() {
+      // @LOCALMOD-START
+      unsigned NextFunctionAlignment = GetTargetBasicBlockAlign();
+      if (NextFunctionAlignment) EmitAlignment(NextFunctionAlignment);
+      // @LOCALMOD-END
+    }
 
     /// EmitInstruction - Targets should implement this to emit instructions.
     virtual void EmitInstruction(const MachineInstr *) {
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index e380650eea..4d093c4cff 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -631,6 +631,18 @@ namespace ISD {
     ATOMIC_LOAD_UMIN,
     ATOMIC_LOAD_UMAX,
 
+    // @LOCALMOD-BEGIN
+    // NACL_* - Native Client instrinsics.
+    // These correspond to functions in:
+    // native_client/src/untrusted/nacl/tls_params.h
+    NACL_THREAD_STACK_PADDING,
+    NACL_TP_ALIGN,
+    NACL_TP_TLS_OFFSET,
+    NACL_TP_TDB_OFFSET,
+    // Expands to the target architecture enumeration value.
+    NACL_TARGET_ARCH,
+    // @LOCALMOD-END
+
     /// BUILTIN_OP_END - This must be the last enum value in this list.
     /// The target-specific pre-isel opcode values start here.
     BUILTIN_OP_END
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 767b666225..c5ffc7ec0e 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_CODEGEN_INTRINSICLOWERING_H
 #define LLVM_CODEGEN_INTRINSICLOWERING_H
 
+#include "llvm/ADT/StringSet.h" // @LOCALMOD
 #include "llvm/Intrinsics.h"
 
 namespace llvm {
@@ -26,12 +27,23 @@ namespace llvm {
   class IntrinsicLowering {
     const TargetData& TD;
 
-    
+    static StringSet<> FuncNames; // @LOCALMOD
+
     bool Warned;
   public:
     explicit IntrinsicLowering(const TargetData &td) :
       TD(td), Warned(false) {}
 
+    /// @LOCALMOD-BEGIN
+    /// GetFuncNames - Get the names of all functions which may
+    /// be called by an intrinsic.
+    static const StringSet<> &GetFuncNames();
+
+    /// IsCalledByIntrinsic - Returns true if a function may be called
+    /// by an intrinsic.
+    static bool IsCalledByIntrinsic(const StringRef &FuncName);
+    /// @LOCALMOD-END
+
     /// AddPrototypes - This method, if called, causes all of the prototypes
     /// that might be needed by an intrinsic lowering implementation to be
     /// inserted into the module specified.
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 89f00e91f7..f95b8b6b84 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -290,7 +290,7 @@ public:
 
   /// getCurrentPCOffset - Return the offset from the start of the emitted
   /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const {
+  virtual uintptr_t getCurrentPCOffset() const { // @LOCALMOD
     return CurBufferPtr-BufferBegin;
   }
 
@@ -335,6 +335,13 @@ public:
   /// getLabelLocations - Return the label locations map of the label IDs to
   /// their address.
   virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { return 0; }
+
+  // @LOCALMOD-START
+  virtual void beginBundleLock() {};
+  virtual void endBundleLock() {};
+  virtual void alignToBundleBeginning() {};
+  virtual void alignToBundleEnd() {};
+  // @LOCALMOD-END
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index eb01f66c31..5be102f0b3 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -159,6 +159,12 @@ public:
   LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
       LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0), IndentLevel(0) {
+    // @LOCALMOD-BEGIN -- Hack for bug
+    // http://code.google.com/p/nativeclient/issues/detail?id=2786
+    Desc.make_weak();
+    InlinedAtLocation.make_weak();
+    // @LOCALMOD-END
+
     if (Parent)
       Parent->addChild(this);
   }
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index d6d65a24de..b0a70a872e 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -57,6 +57,17 @@ public:
 
   virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) = 0;
 
+  // @LOCALMOD-START
+  /// getJumpTableIndex - Check if this is a reference to a jump table.
+  /// If so, return a pointer to the jump table index value that is stored
+  /// in the constant pool, else return 0.
+  /// The default behavior is to indicate that the value is not a jump table
+  /// index. This is used by BranchFolder::runOnMachineFunction() and only in
+  /// conjunction with ARM targets
+  /// TODO: this should be cleaned up as it does tripple duty: tester, setter, getter
+  virtual unsigned *getJumpTableIndex() { return 0; }
+  // @LOCALMOD-END
+
   /// print - Implement operator<<
   virtual void print(raw_ostream &O) const = 0;
 };
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 9192474b95..2a98c0043d 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -276,6 +276,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return BuildMI(BB, MII, DL, MCID);
 }
 
+// @LOCALMOD-BEGIN
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction before the given position in the given MachineBasicBlock,
+/// does NOT take a destination register, and does not add implicit operands.
+///
+inline MachineInstrBuilder BuildMI_NoImp(MachineBasicBlock &BB,
+                                         MachineBasicBlock::iterator I,
+                                         DebugLoc DL,
+                                         const MCInstrDesc &MCID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL, true);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI);
+}
+// @LOCALMOD-END
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction at the end of the given MachineBasicBlock, and does NOT take a
 /// destination register.
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
index 244b466e17..8d71930882 100644
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ b/include/llvm/CodeGen/MachineRelocation.h
@@ -197,6 +197,14 @@ public:
     return Offset;
   }
 
+  // @LOCALMOD-START
+  /// setMachineCodeOffset() - Adjust the offset in the code buffer (this is
+  /// used when the instruction is moved after emission for bundle alignment)
+  void setMachineCodeOffset(intptr_t offset) {
+    Offset = offset;
+  }
+  // @LOCALMOD-END
+
   /// getRelocationType - Return the target-specific relocation ID for this
   /// relocation.
   unsigned getRelocationType() const {
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index 81a11a4c92..fbc2798684 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -77,6 +77,26 @@ public:
     removeDeadConstantUsers();   // remove any dead constants using this.
   }
 
+  // @LOCALMOD-BEGIN
+  /// Set the symbol version for this definition.
+  void setVersionDef(StringRef Version, bool IsDefault);
+
+  /// Set the symbol version and dynamic source file (soname)
+  /// for this exterally provided global.
+  void setNeeded(StringRef Version, StringRef DynFile);
+
+  /// Get the name of this symbol without the version suffix.
+  StringRef getUnversionedName() const;
+
+  /// Get the version of this symbol.
+  /// Returns an empty string if the symbol is unversioned.
+  StringRef getVersion() const;
+
+  /// Returns true if this is the default version of the symbol.
+  /// This may only be called if the symbol is versioned.
+  bool isDefaultVersion() const;
+  // @LOCALMOD-END
+
   unsigned getAlignment() const {
     return (1u << Alignment) >> 1;
   }
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index e2be4c4f6a..c2e2065152 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -443,6 +443,37 @@ def int_convertus  : Intrinsic<[llvm_anyint_ty],
 def int_convertuu  : Intrinsic<[llvm_anyint_ty],
                                [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
 
+// @LOCALMOD-BEGIN
+//===----------------------- Native Client Intrinsics ---------------------===//
+// TODO(sehr): conditionalize this on IsNaCl64 | IsNaCl32 | IsNaClArm.
+// The expansions of these are in lib/Target/X86/X86InstrNacl.{td, cpp} and
+// lib/Target/ARM/ARMInstrInfo.td.
+def int_nacl_setjmp : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty, llvm_ptr_ty]>,
+                      GCCBuiltin<"__builtin_nacl_setjmp">;
+def int_nacl_longjmp : Intrinsic<[],  [llvm_ptr_ty, llvm_i32_ty]>,
+                       GCCBuiltin<"__builtin_nacl_longjmp">;
+
+// The following intrinsics provide target-specific implementations of
+// the interface in native_client/src/untrusted/nacl/tls_params.h.
+// The intrinsic names are basically the functions there without the
+// leading underscores.
+def int_nacl_tp_alignment : Intrinsic<[llvm_i32_ty], []>,
+                            GCCBuiltin<"__builtin_nacl_tp_alignment">;
+def int_nacl_tp_tls_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tls_offset">;
+def int_nacl_tp_tdb_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tdb_offset">;
+def int_nacl_thread_stack_padding :
+        Intrinsic<[llvm_i32_ty], []>,
+        GCCBuiltin<"__builtin_nacl_thread_stack_padding">;
+
+// The following intrinsic provides a target-specific constant value to
+// indicate the target platform compiled to.  The enum values are enumerated
+// pnaclintrin.h.
+def int_nacl_target_arch : Intrinsic<[llvm_i32_ty], []>,
+                            GCCBuiltin<"__builtin_nacl_target_arch">;
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 05e6286b7c..56a489c9f2 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -25,6 +25,7 @@ class MCInst;
 class MCInstFragment;
 class MCObjectWriter;
 class MCSection;
+class MCStreamer;
 class MCValue;
 class raw_ostream;
 
@@ -143,6 +144,23 @@ public:
   /// handleAssemblerFlag - Handle any target-specific assembler flags.
   /// By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
+  
+  // @LOCALMOD-BEGIN
+  /// getBundleSize - Return the size (in bytes) of code bundling units
+  /// for this target. If 0, bundling is disabled. This is used exclusively
+  /// for Native Client.
+  virtual unsigned getBundleSize() const {
+    return 0;
+  }
+
+  /// CustomExpandInst -
+  ///   If the MCInst instruction has a custom expansion, write it to the
+  /// MCStreamer 'Out'. This can be used to perform "last minute" rewrites of
+  /// MCInst instructions for emission.
+  virtual bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return false;
+  }
+  // @LOCALMOD-END
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index ae0dad2fd1..619b4939f2 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -48,6 +48,14 @@ namespace llvm {
     ///               Default is 4.
     unsigned PointerSize;
 
+    /// @LOCALMOD-BEGIN
+    /// TODO(pdox): Before upstreaming this, make sure every target backend
+    ///             sets it correctly.
+    /// StackSlotSize - Stack slot size in bytes.
+    ///                 Default is 4.
+    unsigned StackSlotSize;
+    /// @LOCALMOD-END
+
     /// IsLittleEndian - True if target is little endian.
     ///                  Default is true.
     bool IsLittleEndian;
@@ -348,6 +356,13 @@ namespace llvm {
       return PointerSize;
     }
 
+    /// @LOCALMOD-BEGIN
+    /// getStackSlotSize - Get the stack slot size in bytes.
+    unsigned getStackSlotSize() const {
+      return StackSlotSize;
+    }
+    /// @LOCALMOD-END
+
     /// islittleendian - True if the target is little endian.
     bool isLittleEndian() const {
       return IsLittleEndian;
diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h
index cf79216d07..fdded4ffa7 100644
--- a/include/llvm/MC/MCAsmLayout.h
+++ b/include/llvm/MC/MCAsmLayout.h
@@ -80,6 +80,11 @@ public:
   /// \brief Get the offset of the given fragment inside its containing section.
   uint64_t getFragmentOffset(const MCFragment *F) const;
 
+  // @LOCALMOD-BEGIN
+  /// \brief Get the bundle padding of the given fragment.
+  uint8_t getFragmentPadding(const MCFragment *F) const;
+  // @LOCALMOD-END
+
   /// @}
   /// @name Utility Functions
   /// @{
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 4ab7f91f72..b0b78e1c1c 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -52,9 +52,18 @@ public:
     FT_Org,
     FT_Dwarf,
     FT_DwarfFrame,
-    FT_LEB
+    FT_LEB,
+    FT_Tiny           // @LOCALMOD
   };
 
+  // @LOCALMOD-BEGIN
+  enum BundleAlignType {
+    BundleAlignNone  = 0,
+    BundleAlignStart = 1,
+    BundleAlignEnd   = 2
+  };
+  // @LOCALMOD-END
+
 private:
   FragmentType Kind;
 
@@ -78,6 +87,16 @@ private:
   /// LayoutOrder - The layout order of this fragment.
   unsigned LayoutOrder;
 
+  // @LOCALMOD-BEGIN
+  BundleAlignType BundleAlign : 2;
+  bool BundleGroupStart       : 1;
+  bool BundleGroupEnd         : 1;
+
+  /// BundlePadding - The computed padding for this fragment. This is ~0
+  /// until initialized.
+  uint8_t BundlePadding;
+  // @LOCALMOD-END
+
   /// @}
 
 protected:
@@ -99,14 +118,46 @@ public:
   unsigned getLayoutOrder() const { return LayoutOrder; }
   void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
 
+  // @LOCALMOD-BEGIN
+  bool isBundleGroupStart() const { return BundleGroupStart; }
+  void setBundleGroupStart(bool Value) { BundleGroupStart = Value; }
+
+  bool isBundleGroupEnd() const { return BundleGroupEnd; }
+  void setBundleGroupEnd(bool Value) { BundleGroupEnd = Value; }
+
+  BundleAlignType getBundleAlign() const { return BundleAlign; }
+  void setBundleAlign(BundleAlignType Value) { BundleAlign = Value; }
+  // @LOCALMOD-END
+
   static bool classof(const MCFragment *O) { return true; }
 
   void dump();
 };
 
+// @LOCALMOD-BEGIN
+// This is just a tiny data fragment with no fixups.
+// (To help with memory usage)
+class MCTinyFragment : public MCFragment {
+ private:
+  SmallString<6> Contents;
+
+ public:
+
+  MCTinyFragment(MCSectionData *SD = 0) : MCFragment(FT_Tiny, SD) {}
+
+  SmallString<6> &getContents() { return Contents; }
+  const SmallString<6> &getContents() const { return Contents; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Tiny;
+  }
+  static bool classof(const MCTinyFragment *) { return true; }
+};
+// @LOCALMOD-END
+
 class MCDataFragment : public MCFragment {
   virtual void anchor();
-  SmallString<32> Contents;
+  SmallString<6> Contents;  // @LOCALMOD: Memory efficiency
 
   /// Fixups - The list of fixups in this fragment.
   std::vector<MCFixup> Fixups;
@@ -121,8 +172,8 @@ public:
   /// @name Accessors
   /// @{
 
-  SmallString<32> &getContents() { return Contents; }
-  const SmallString<32> &getContents() const { return Contents; }
+  SmallString<6> &getContents() { return Contents; }  // @LOCALMOD
+  const SmallString<6> &getContents() const { return Contents; } // @LOCALMOD
 
   /// @}
   /// @name Fixup Access
@@ -474,6 +525,21 @@ private:
   /// it.
   unsigned HasInstructions : 1;
 
+  // @LOCALMOD-BEGIN
+  bool BundlingEnabled;
+  bool BundleLocked;
+
+  // Because ".bundle_lock" occurs before the fragment it applies to exists,
+  // we need to keep this flag around so we know to mark the next fragment
+  // as the start of a bundle group. A similar flag is not necessary for the
+  // last fragment, because when a .bundle_unlock occurs, the last fragment
+  // in the group already exists and can be marked directly.
+  bool BundleGroupFirstFrag;
+
+  typedef MCFragment::BundleAlignType BundleAlignType;
+  BundleAlignType BundleAlignNext;
+  // @LOCALMOD-END
+
   /// @}
 
 public:
@@ -495,6 +561,20 @@ public:
   unsigned getLayoutOrder() const { return LayoutOrder; }
   void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
 
+  // @LOCALMOD-BEGIN
+  bool isBundlingEnabled() const { return BundlingEnabled; }
+
+  bool isBundleLocked() const { return BundleLocked; }
+  void setBundleLocked(bool Value) { BundleLocked = Value; }
+
+  bool isBundleGroupFirstFrag() const { return BundleGroupFirstFrag; }
+  void setBundleGroupFirstFrag(bool Value) { BundleGroupFirstFrag = Value; }
+
+
+  BundleAlignType getBundleAlignNext() const { return BundleAlignNext; }
+  void setBundleAlignNext(BundleAlignType Value) { BundleAlignNext = Value; }
+  // @LOCALMOD-END
+
   /// @name Fragment Access
   /// @{
 
@@ -753,6 +833,13 @@ private:
   bool fragmentNeedsRelaxation(const MCInstFragment *IF,
                                const MCAsmLayout &Layout) const;
 
+  // @LOCALMOD-BEGIN
+  uint8_t ComputeBundlePadding(const MCAsmLayout &Layout,
+                               MCFragment *F,
+                               uint64_t FragmentOffset) const;
+  // @LOCALMOD-END
+
+
   /// layoutOnce - Perform one layout iteration and return true if any offsets
   /// were adjusted.
   bool layoutOnce(MCAsmLayout &Layout);
@@ -819,6 +906,12 @@ public:
 
   MCAsmBackend &getBackend() const { return Backend; }
 
+  // @LOCALMOD-BEGIN
+  uint64_t getBundleSize() const;
+  uint64_t getBundleMask() const;
+  // @LOCALMOD-END
+
+
   MCCodeEmitter &getEmitter() const { return Emitter; }
 
   MCObjectWriter &getWriter() const { return Writer; }
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index f153cb0c1a..5718feca88 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -67,6 +67,12 @@ public:
         return ELF::ELFOSABI_FREEBSD;
       case Triple::Linux:
         return ELF::ELFOSABI_LINUX;
+        // @LOCALMOD-BEGIN
+        // This shouldn't be needed anymore (sel_ldr doesn't check for it),
+        // but removing it may require some changes in binutils also.
+      case Triple::NativeClient:
+        return ELF::ELFOSABI_NACL;
+        // @LOCALMOD-END
       default:
         return ELF::ELFOSABI_NONE;
     }
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index a69075ddd0..aef6303bf7 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -68,6 +68,14 @@ public:
                              unsigned AddrSpace);
   virtual void EmitULEB128Value(const MCExpr *Value);
   virtual void EmitSLEB128Value(const MCExpr *Value);
+                             
+  // @LOCALMOD-BEGIN
+  void EmitBundleLock();
+  void EmitBundleUnlock();
+  void EmitBundleAlignStart();
+  void EmitBundleAlignEnd();
+  // @LOCALMOD-END
+
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void ChangeSection(const MCSection *Section);
   virtual void EmitInstruction(const MCInst &Inst);
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 44c5fefaa1..99736c92a5 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -467,6 +467,27 @@ namespace llvm {
 
     /// @}
 
+    // @LOCALMOD-BEGIN
+    /// @name Bundling Directives
+    /// @{
+
+    /// EmitBundleLock - Begin a group of instructions which cannot
+    /// cross a bundle boundary.
+    virtual void EmitBundleLock() = 0;
+
+    /// EmitBundleUnlock - End a bundle-locked group of instructions.
+    virtual void EmitBundleUnlock() = 0;
+
+    /// EmitBundleAlignStart - Guarantee that the next instruction or
+    /// bundle-locked group starts at the beginning of a bundle.
+    virtual void EmitBundleAlignStart() = 0;
+
+    /// EmitBundleAlignEnd - Guarantee that the next instruction or
+    /// bundle-locked group finishes at the end of a bundle.
+    virtual void EmitBundleAlignEnd() = 0;
+    /// @}
+    // @LOCALMOD-END
+
     /// EmitFileDirective - Switch to a new logical file.  This is used to
     /// implement the '.file "foo.c"' assembler directive.
     virtual void EmitFileDirective(StringRef Filename) = 0;
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index cb7c1dc36a..29e278fad5 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -186,6 +186,22 @@ public:
       : Behavior(B), Key(K), Val(V) {}
   };
 
+  /// @LOCALMOD-BEGIN
+  /// An enumeration for describing the module format
+  enum OutputFormat {
+    ObjectOutputFormat,
+    SharedOutputFormat,
+    ExecutableOutputFormat
+  };
+
+  /// A structure describing the symbols needed from an external file.
+  struct NeededRecord {
+    std::string              DynFile; // Source file (soname)
+    std::vector<std::string> Symbols; // List of symbol names
+                                      // (with version suffix)
+  };
+  /// @LOCALMOD-END
+  
 /// @}
 /// @name Member Variables
 /// @{
@@ -203,6 +219,9 @@ private:
   std::string ModuleID;           ///< Human readable identifier for the module
   std::string TargetTriple;       ///< Platform target triple Module compiled on
   std::string DataLayout;         ///< Target data description
+  // @LOCALMOD-BEGIN
+  mutable std::string ModuleSOName; ///< Module SOName (for shared format)
+  // @LOCALMOD-END
   void *NamedMDSymTab;            ///< NamedMDNode names.
 
   friend class Constant;
@@ -234,6 +253,24 @@ public:
   /// @returns a string containing the target triple.
   const std::string &getTargetTriple() const { return TargetTriple; }
 
+  // @LOCALMOD-BEGIN
+
+  /// Get the module format
+  /// @returns the module format
+  OutputFormat getOutputFormat() const;
+
+  /// Get the SOName of this module.
+  /// @returns a string containing the module soname
+  const std::string &getSOName() const;
+
+  /// Record the needed information for a global value.
+  /// This creates a needed record for DynFile, if one does not already exist.
+  void addNeededRecord(StringRef DynFile, GlobalValue *GV);
+
+  // Fill NeededOut with all needed records present in the module.
+  void getNeededRecords(std::vector<NeededRecord> *NeededOut) const;
+  // @LOCALMOD-END
+
   /// Get the target endian information.
   /// @returns Endianess - an enumeration for the endianess of the target
   Endianness getEndianness() const;
@@ -263,6 +300,18 @@ public:
   /// Set the target triple.
   void setTargetTriple(StringRef T) { TargetTriple = T; }
 
+  /// @LOCALMOD-BEGIN
+
+  /// Set the module format
+  void setOutputFormat(OutputFormat F);
+
+  /// For modules with output format "shared", set the output soname.
+  void setSOName(StringRef Name);
+
+  /// Wrap a global symbol.
+  void wrapSymbol(StringRef SymName);
+  /// @LOCALMOD-END
+
   /// Set the module-scope inline assembly blocks.
   void setModuleInlineAsm(StringRef Asm) {
     GlobalScopeAsm = Asm;
@@ -589,6 +638,11 @@ public:
   /// Dump the module to stderr (for debugging).
   void dump() const;
   
+  /// @LOCALMOD-BEGIN
+  /// Print the PNaCl metadata for the module.
+  void dumpMeta(raw_ostream &OS) const;
+  /// @LOCALMOD-END
+
   /// This function causes all the subinstructions to "let go" of all references
   /// that they are maintaining.  This allows one to 'delete' a whole class at
   /// a time, even though there may be circular references... first all
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index f2210dc0f2..9373958648 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -325,6 +325,7 @@ enum {
   ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000
   ELFOSABI_C6000_LINUX = 65,  // Linux TMS320C6000
   ELFOSABI_ARM = 97,          // ARM
+  ELFOSABI_NACL = 123,        // Native Client // @LOCALMOD
   ELFOSABI_STANDALONE = 255   // Standalone (embedded) application
 };
 
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index 6787633c1d..966af39a15 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -104,6 +104,11 @@ protected:
   void setValPtrInt(unsigned K) { VP.setInt(K); }
   unsigned getValPtrInt() const { return VP.getInt(); }
 
+  // @LOCALMOD-BEGIN -- Hack for bug:
+  // http://code.google.com/p/nativeclient/issues/detail?id=2786
+  void setKind(HandleBaseKind K) { PrevPair.setInt(K); }
+  // @LOCALMOD-END
+
   static bool isValid(Value *V) {
     return V &&
            V != DenseMapInfo<Value *>::getEmptyKey() &&
@@ -231,6 +236,15 @@ public:
     return getValPtr();
   }
 
+  // @LOCALMOD-BEGIN -- Hack for bug:
+  // http://code.google.com/p/nativeclient/issues/detail?id=2786
+  // This allows us to weaken the Asserting Value Handle in LexicalScopes.h,
+  // for Debug info only.
+  void make_weak() {
+    setKind(Weak);
+  }
+  // @LOCALMOD-END
+
   ValueTy *operator->() const { return getValPtr(); }
   ValueTy &operator*() const { return *getValPtr(); }
 };
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index af812069b9..959fb3fff1 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -595,7 +595,7 @@ enum _ {
 #else
   stream_timeout                      = ETIMEDOUT,
 #endif
-  text_file_busy                      = ETXTBSY,
+  text_file_busy                      = EINVAL, // @LOCALMOD
   timed_out                           = ETIMEDOUT,
   too_many_files_open_in_system       = ENFILE,
   too_many_files_open                 = EMFILE,
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index e053ce8c6b..621a9c83ba 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -717,6 +717,40 @@ def BUNDLE : Instruction {
   let InOperandList = (ins variable_ops);
   let AsmString = "BUNDLE";
 }
+// @LOCALMOD-BEGIN
+def BUNDLE_ALIGN_START : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_ALIGN_END : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_LOCK : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_UNLOCK : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+// @LOCALMOD-END
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index d56db7b511..7df3bfa473 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -48,11 +48,19 @@ private:
   unsigned StackAlignment;
   unsigned TransientStackAlignment;
   int LocalAreaOffset;
+
+  // @LOCALMOD-BEGIN
+  // TODO(pdox): Refactor this and upstream it, to get rid of the
+  // assumption that StackSlotSize == PointerSize.
+  unsigned StackSlotSize;
+  // @LOCALMOD-END
 public:
-  TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
-                      unsigned TransAl = 1)
+  TargetFrameLowering(StackDirection D,
+                      unsigned StackAl, int LAO,
+                      unsigned TransAl = 1,
+                      unsigned SlotSize = 0) // @LOCALMOD
     : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
-      LocalAreaOffset(LAO) {}
+      LocalAreaOffset(LAO), StackSlotSize(SlotSize) {}
 
   virtual ~TargetFrameLowering();
 
@@ -63,6 +71,11 @@ public:
   ///
   StackDirection getStackGrowthDirection() const { return StackDir; }
 
+  // @LOCALMOD-BEGIN
+  /// getStackSlotSize - Return the size of a stack slot
+  unsigned getStackSlotSize() const { return StackSlotSize; }
+  // @LOCALMOD-END
+
   /// getStackAlignment - This method returns the number of bytes to which the
   /// stack pointer must be aligned on entry to a function.  Typically, this
   /// is the largest alignment for any data object in the target.
diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h
index 044afd9b73..c2bb376131 100644
--- a/include/llvm/Target/TargetJITInfo.h
+++ b/include/llvm/Target/TargetJITInfo.h
@@ -129,6 +129,25 @@ namespace llvm {
     /// separately allocated heap memory rather than in the same
     /// code memory allocated by JITCodeEmitter.
     virtual bool allocateSeparateGVMemory() const { return false; }
+
+    // @LOCALMOD-START
+    // NaCl-specific, target-specific stuff
+    typedef struct { uint8_t *ins; int len; } HaltInstruction;
+    /// Get a sequence of NOPs of length len. Returns a pointer to a buffer
+    /// containing a val
+    virtual const uint8_t *getNopSequence(size_t len) const { return NULL; }
+    /// Get the length and definition of the halt/roadblock instruction
+    virtual const HaltInstruction *getHalt() const { return NULL; }
+    virtual int getBundleSize() const { return 0; }
+    virtual int32_t getJumpMask() const { return 0; }
+
+    /// Relocations cannot happen in-place in NaCl because we can't write to
+    /// code. This function takes a pointer to where the code has been emitted,
+    /// before it is copied to the code region. The subsequent call to
+    /// relocate takes pointers to the target code location, but rewrites the
+    /// code in the relocation buffer rather than at the target
+    virtual void setRelocationBuffer(unsigned char * BufferBegin) {}
+    // @LOCALMOD-END
   protected:
     bool useGOT;
   };
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 915dd9d4e8..896ba39096 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -106,6 +106,18 @@ public:
     ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
   };
 
+  // @LOCALMOD-START
+  // This needs to be kept in sync with
+  // native_client/src/untrusted/nacl/pnaclintrin.h.
+  enum PnaclTargetArchitecture {
+    PnaclTargetArchitectureInvalid = 0,
+    PnaclTargetArchitectureX86_32,
+    PnaclTargetArchitectureX86_64,
+    PnaclTargetArchitectureARM_32,
+    PnaclTargetArchitectureARM_32_Thumb
+  };
+  // @LOCALMOD-END
+
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
     switch (Content) {
     case UndefinedBooleanContent:
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index f0b181e345..4a38524ad1 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -87,7 +87,14 @@ namespace TargetOpcode {
     /// BUNDLE - This instruction represents an instruction bundle. Instructions
     /// which immediately follow a BUNDLE instruction which are marked with
     /// 'InsideBundle' flag are inside the bundle.
-    BUNDLE
+    BUNDLE,
+    
+    // @LOCALMOD-BEGIN
+    BUNDLE_ALIGN_START = 14,
+    BUNDLE_ALIGN_END = 15,
+    BUNDLE_LOCK = 16,
+    BUNDLE_UNLOCK = 17
+    // @LOCALMOD-END
   };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index 12a2757315..92e627cea8 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -30,6 +30,12 @@ namespace llvm {
     };
   }
 
+  // @LOCALMOD-BEGIN
+  /// TLSUseCall - This flag enables the use of a function call to get the
+  /// thread pointer for TLS accesses, instead of using inline code.
+  extern bool TLSUseCall;
+  // @LOCALMOD-END
+
   class TargetOptions {
   public:
     TargetOptions()
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 83756daa7a..f25a6809e4 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1481,6 +1481,14 @@ bool BitcodeReader::ParseModule(bool Resume) {
       std::string S;
       if (ConvertToString(Record, 0, S))
         return Error("Invalid MODULE_CODE_TRIPLE record");
+
+      // @LOCALMOD-BEGIN
+      // This hack is needed in order to get Clang compiled binaries
+      // working with the Gold plugin, until PNaCl backend is introduced
+      // in lib/Target/PNaCl.
+      if (S == "le32-unknown-nacl")
+        S = "armv7-none-linux-gnueabi";
+      // @LOCALMOD-END
       TheModule->setTargetTriple(S);
       break;
     }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index eeacc43c09..01f75a78b7 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -156,6 +156,11 @@ bool AsmPrinter::doInitialization(Module &M) {
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
+  // @LOCALMOD-BEGIN
+  IsPlainObject =
+    (MMI->getModule()->getOutputFormat() == Module::ObjectOutputFormat);
+  // @LOCALMOD-END
+
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
@@ -272,6 +277,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
 
+  // @LOCALMOD-BEGIN
+  // For .pexe and .pso files, emit ELF type STT_OBJECT or STT_TLS instead
+  // of NOTYPE for undefined symbols.
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527
+  if (!GV->hasInitializer() && !IsPlainObject) {
+    OutStreamer.EmitSymbolAttribute(GVSym,
+                                    GV->isThreadLocal() ? MCSA_ELF_TypeTLS
+                                                        : MCSA_ELF_TypeObject);
+  }
+  // @LOCALMOD-END
+
   if (!GV->hasInitializer())   // External globals require no extra code.
     return;
 
@@ -682,9 +698,14 @@ void AsmPrinter::EmitFunctionBody() {
         break;
 
       case TargetOpcode::EH_LABEL:
-      case TargetOpcode::GC_LABEL:
+      case TargetOpcode::GC_LABEL: {
+        // @LOCALMOD-START
+        unsigned LabelAlign = GetTargetLabelAlign(II);
+        if (LabelAlign) EmitAlignment(LabelAlign);
+        // @LOCALMOD-END
         OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
         break;
+      }
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(II);
         break;
@@ -700,6 +721,20 @@ void AsmPrinter::EmitFunctionBody() {
       case TargetOpcode::KILL:
         if (isVerbose()) EmitKill(II, *this);
         break;
+      // @LOCALMOD-BEGIN
+      case TargetOpcode::BUNDLE_ALIGN_START:
+        OutStreamer.EmitBundleAlignStart();
+        break;
+      case TargetOpcode::BUNDLE_ALIGN_END:
+        OutStreamer.EmitBundleAlignEnd();
+        break;
+      case TargetOpcode::BUNDLE_LOCK:
+        OutStreamer.EmitBundleLock();
+        break;
+      case TargetOpcode::BUNDLE_UNLOCK:
+        OutStreamer.EmitBundleUnlock();
+        break;
+      // @LOCALMOD-END
       default:
         if (!TM.hasMCUseLoc())
           MCLineEntry::Make(&OutStreamer, getCurrentSection());
@@ -849,6 +884,16 @@ bool AsmPrinter::doFinalization(Module &M) {
     const Function &F = *I;
     if (!F.isDeclaration())
       continue;
+
+    // @LOCALMOD-BEGIN
+    // For .pexe and .pso files, emit STT_FUNC for function declarations.
+    // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527
+    if (!IsPlainObject) {
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(&F),
+                                      MCSA_ELF_TypeFunction);
+    }
+    // @LOCALMOD-END
+
     GlobalValue::VisibilityTypes V = F.getVisibility();
     if (V == GlobalValue::DefaultVisibility)
       continue;
@@ -1066,12 +1111,25 @@ void AsmPrinter::EmitJumpTableInfo() {
   if (// In PIC mode, we need to emit the jump table to the same section as the
       // function body itself, otherwise the label differences won't make sense.
       // FIXME: Need a better predicate for this: what about custom entries?
-      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+      (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
       // We should also do if the section name is NULL or function is declared
       // in discardable section
       // FIXME: this isn't the right predicate, should be based on the MCSection
       // for the function.
-      F->isWeakForLinker()) {
+      // @LOCALMOD-START
+      // the original code is a hack
+      // jumptables usually end up in .rodata
+      // but for functions with weak linkage there is a chance that the are
+      // not needed. So in order to be discard the function AND the jumptable
+      // they keep them both in .text. This fix only works if we never discard
+      // weak functions. This is guaranteed because the bitcode linker already
+      // throws out unused ones.
+      // TODO: Investigate the other case of concern -- PIC code.
+      // Concern is about jumptables being in a different section: can the
+      // rodata and text be too far apart for a RIP-relative offset?
+       F->isWeakForLinker())
+      && !UseReadOnlyJumpTables()) {
+      // @LOCALMOD-END
     OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
   } else {
     // Otherwise, drop it in the readonly section.
@@ -1093,7 +1151,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     // .set directive for each unique entry.  This reduces the number of
     // relocations the assembler will generate for the jump table.
     if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
-        MAI->hasSetDirective()) {
+        MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD
       SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
       const TargetLowering *TLI = TM.getTargetLowering();
       const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
@@ -1174,7 +1232,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // If we have emitted set directives for the jump table entries, print
     // them rather than the entries themselves.  If we're emitting PIC, then
     // emit the table entries as differences between two text section labels.
-    if (MAI->hasSetDirective()) {
+    if (MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD
       // If we used .set, reference the .set's symbol.
       Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
                                       OutContext);
@@ -1194,7 +1252,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
 }
 
-
 /// EmitSpecialLLVMGlobal - Check to see if the specified global is a
 /// special global used by LLVM.  If so, emit it and return true, otherwise
 /// do nothing and return false.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 2e24977ef2..734e7b9195 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -520,7 +520,8 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) {
 /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
 /// maps as well.
 unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, 
-                                         StringRef DirName) {
+                                         StringRef DirName,
+                                         StringRef Extra) { // @LOCALMOD
   // If FE did not provide a file name, then assume stdin.
   if (FileName.empty())
     return GetOrCreateSourceID("<stdin>", StringRef());
@@ -536,6 +537,9 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
   NamePair += DirName;
   NamePair += '\0'; // Zero bytes are not allowed in paths.
   NamePair += FileName;
+  // @LOCALMOD
+  NamePair += '\0'; // Zero bytes are not allowed in paths.
+  NamePair += Extra;
 
   StringMapEntry<unsigned> &Ent = SourceIdMap.GetOrCreateValue(NamePair, SrcId);
   if (Ent.getValue() != SrcId)
@@ -547,13 +551,37 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName,
   return SrcId;
 }
 
+// @LOCALMOD-BEGIN
+// A special version of GetOrCreateSourceID for CompileUnits.
+// It is possible that with bitcode linking, we end up with distinct
+// compile units based on the same source file.
+// E.g., compile foo.c with -DMACRO1 to foo1.bc, then compile
+// foo.c again with -DMACRO2 to foo2.bc and link.
+// We use additional information to form a unique ID in that case.
+unsigned DwarfDebug::GetOrCreateCompileUnitID(StringRef Filename,
+                                              StringRef Dirname,
+                                              const MDNode *N) {
+  std::string DIUnitStr;
+  raw_string_ostream ostr(DIUnitStr);
+
+  // Using information from the compile unit (N)'s getEnumTypes(),
+  // getRetainedTypes(), getSubprograms(), getGlobalVariables()
+  // could be pretty expensive.
+  // Cheat and use the MDNode's address as an additional identifying factor.
+  // constructCompileUnit() is only called once per compile unit.
+  ostr << static_cast<const void*>(N);
+  return GetOrCreateSourceID(Filename, Dirname, ostr.str());
+}
+// @LOCALMOD-END
+
 /// constructCompileUnit - Create new CompileUnit for the given
 /// metadata node with tag DW_TAG_compile_unit.
 CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   CompilationDir = DIUnit.getDirectory();
-  unsigned ID = GetOrCreateSourceID(FN, CompilationDir);
+  // @LOCALMOD
+  unsigned ID = GetOrCreateCompileUnitID(FN, CompilationDir, N);
 
   DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
   CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, Asm, this);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index a4580734a8..d153c0dd0c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -515,7 +515,16 @@ public:
   /// GetOrCreateSourceID - Look up the source id with the given directory and
   /// source file names. If none currently exists, create a new id and insert it
   /// in the SourceIds map.
-  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName);
+  unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName,
+                               StringRef Extra = ""); // @LOCALMOD for Extra
+
+  // @LOCALMOD-BEGIN - Create an ID for CompileUnits, taking extra care
+  // in the case that we have multiple compile units coming from the
+  // same source file and directory.
+  unsigned GetOrCreateCompileUnitID(StringRef FileName, StringRef DirName,
+                                    const MDNode *N);
+  // @LOCALMOD-END
+
 
   /// createSubprogramDIE - Create new DIE using SP.
   DIE *createSubprogramDIE(DISubprogram SP);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index fb65bb7f3f..b9d2cfd4ed 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -20,6 +20,7 @@
 #include "BranchFolding.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineConstantPool.h" //  @LOCALMOD
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -234,6 +235,21 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
       }
   }
 
+    // @LOCALMOD-START
+    // This currently only used on ARM targets where the ConstantPool
+    // subclass is overloading getJumpTableIndex()
+    const std::vector<MachineConstantPoolEntry>& CPs =
+      MF.getConstantPool()->getConstants();
+    for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+      if (!CPs[i].isMachineConstantPoolEntry()) continue;
+      unsigned *JTIndex = CPs[i].Val.MachineCPVal->getJumpTableIndex();
+      if (!JTIndex) continue;
+      // Remember that this JT is live.
+      JTIsLive.set(*JTIndex);
+    }
+    // @LOCALMOD-END
+
+
   // Finally, remove dead jump tables.  This happens when the
   // indirect jump was unreachable (and thus deleted).
   for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index a9ca42f69b..8ccab9cd5d 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -92,6 +92,46 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 #  define setjmp_undefined_for_msvc
 #endif
 
+// @LOCALMOD-BEGIN
+// Calls to these functions may materialize as part of a conversion
+// from an intrinsics, e.g. llvm.memset -> memset
+// So if these functions are available in bitcode form we need to:
+// * make sure they do not get discarded -- if there is a chance that
+//   a caller might materialize
+// * make sure they do not get specialized for a given callsite
+// Both problems are avoided by pretending there are unknown callers.
+// The function: IntrinsicLowering::AddPrototypes() below does just that.
+// TODO(robertm): elaborate some more
+static const char *IntrinsicNames[] = {
+  "abort",
+  "memcpy", "memset", "memmove",
+  "sqrtf", "sqrt", "sqrtl",
+  "sinf", "sin", "sinl",
+  "cosf", "cos", "cosl",
+  "powf", "pow", "powl",
+  "logf", "log", "logl",
+  "log2f", "log2", "log2l",
+  "log10f", "log10", "log10l",
+  "expf", "exp", "expl",
+  "exp2f", "exp2", "exp2l",
+  NULL
+};
+
+StringSet<> IntrinsicLowering::FuncNames;
+
+const StringSet<> &IntrinsicLowering::GetFuncNames() {
+  if (FuncNames.empty()) {
+    for (unsigned i=0; IntrinsicNames[i]; ++i)
+      FuncNames.insert(IntrinsicNames[i]);
+  }
+  return FuncNames;
+}
+
+bool IntrinsicLowering::IsCalledByIntrinsic(const StringRef &FuncName) {
+  return IntrinsicLowering::GetFuncNames().count(FuncName) > 0;
+}
+// @LOCALMOD-END
+
 void IntrinsicLowering::AddPrototypes(Module &M) {
   LLVMContext &Context = M.getContext();
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index cf885251d6..20b302926b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -162,7 +162,8 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   iterator B = begin(), E = end(), I = E;
-  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()
+      || I->getOpcode() == TargetOpcode::BUNDLE_UNLOCK)) // @LOCALMOD
     ; /*noop */
   while (I != E && !I->isTerminator())
     ++I;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b8489f03b..1aee7c572d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8262,6 +8262,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
       if (TLI.isTypeLegal(N2.getValueType()) &&
+          // @LOCALMOD-START
+          // when we combine two 8byte constants into a 16byte one
+          // we get constant pool entries which are too big
+          TLI.getTargetData()->getTypeAllocSize(FV->getConstantFPValue()->getType()) <= 4 &&
+          // @LOCALMOD-STOP
           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
            TargetLowering::Legal) &&
           // If both constants have multiple uses, then we won't need to do an
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4152aa1ae1..88a6baf02f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5176,6 +5176,38 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::lifetime_end:
     // Discard region information.
     return 0;
+  // @LOCALMOD-BEGIN
+  // Native Client Intrinsics for TLS setup / layout.
+  case Intrinsic::nacl_thread_stack_padding: {
+    EVT DestVT = TLI.getValueType(I.getType());
+    setValue(&I, DAG.getNode(ISD::NACL_THREAD_STACK_PADDING, dl, DestVT));
+    return 0;
+  }
+  case Intrinsic::nacl_tp_alignment: {
+    EVT DestVT = TLI.getValueType(I.getType());
+    setValue(&I, DAG.getNode(ISD::NACL_TP_ALIGN, dl, DestVT));
+    return 0;
+  }
+  case Intrinsic::nacl_tp_tls_offset: {
+    SDValue tls_size = getValue(I.getArgOperand(0));
+    setValue(&I, DAG.getNode(ISD::NACL_TP_TLS_OFFSET, dl,
+                             tls_size.getValueType(),
+                             tls_size));
+    return 0;
+  }
+  case Intrinsic::nacl_tp_tdb_offset: {
+    SDValue tdb_size = getValue(I.getArgOperand(0));
+    setValue(&I, DAG.getNode(ISD::NACL_TP_TDB_OFFSET, dl,
+                             tdb_size.getValueType(),
+                             tdb_size));
+    return 0;
+  }
+  case Intrinsic::nacl_target_arch: {
+    EVT DestVT = TLI.getValueType(I.getType());
+    setValue(&I, DAG.getNode(ISD::NACL_TARGET_ARCH, dl, DestVT));
+    return 0;
+  }
+  // @LOCALMOD-END
   }
 }
 
@@ -6358,7 +6390,10 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
-                           TD.getABITypeAlignment(I.getType()));
+// @LOCALMOD-BEGIN
+                           TD.getCallFrameTypeAlignment(I.getType()));
+// @LOCALMOD-END
+
   setValue(&I, V);
   DAG.setRoot(V.getValue(1));
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 14e9ec33eb..69ba83ed27 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -310,6 +310,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     case ISD::SETFALSE:                 return "setfalse";
     case ISD::SETFALSE2:                return "setfalse2";
     }
+
+  // @LOCALMOD-BEGIN
+  // NaCl intrinsics for TLS setup
+  case ISD::NACL_THREAD_STACK_PADDING: return "nacl_thread_stack_padding";
+  case ISD::NACL_TP_ALIGN:             return "nacl_tp_alignment";
+  case ISD::NACL_TP_TLS_OFFSET:        return "nacl_tls_offset";
+  case ISD::NACL_TP_TDB_OFFSET:        return "nacl_tdb_offset";
+  case ISD::NACL_TARGET_ARCH:          return "nacl_target_arch";
+  // @LOCALMOD-END
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 4add002890..e3d054d4ce 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -559,7 +559,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
-
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
 
   // Run the DAG combiner in pre-legalize mode.
@@ -588,7 +587,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   if (Changed) {
     if (ViewDAGCombineLT)
       CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
-
     // Run the DAG combiner in post-type-legalize mode.
     {
       NamedRegionTimer T("DAG Combining after legalize types", GroupName,
@@ -610,10 +608,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
       CurDAG->LegalizeTypes();
     }
-
     if (ViewDAGCombineLT)
       CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
-
     // Run the DAG combiner in post-type-legalize mode.
     {
       NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
@@ -624,19 +620,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
           << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
   }
-
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
-
   {
     NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
     CurDAG->Legalize();
   }
-
   DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
-
   // Run the DAG combiner in post-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 7a89e3ca63..bdc27485a7 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -55,8 +55,16 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
   case dwarf::DW_EH_PE_absptr:
     return  Mang->getSymbol(GV);
   case dwarf::DW_EH_PE_pcrel: {
+    // @LOCALMOD-BEGIN
+    // The dwarf section label should not include the version suffix.
+    // Strip it off here.
+    StringRef Name = Mang->getSymbol(GV)->getName();
+    size_t atpos = Name.find("@");
+    if (atpos != StringRef::npos)
+      Name = Name.substr(0, atpos);
+    // @LOCALMOD-END
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
-                                          Mang->getSymbol(GV)->getName());
+                                          Name); // @LOCALMOD
   }
   }
 }
@@ -65,7 +73,15 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const TargetMachine &TM,
                                                        const MCSymbol *Sym) const {
   SmallString<64> NameData("DW.ref.");
-  NameData += Sym->getName();
+  // @LOCALMOD-BEGIN
+  // The dwarf section label should not include the version suffix.
+  // Strip it off here.
+  StringRef Name = Sym->getName();
+  size_t atpos = Name.find("@");
+  if (atpos != StringRef::npos)
+    Name = Name.substr(0, atpos);
+  // @LOCALMOD-END
+  NameData += Name; // @LOCALMOD
   MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 7a206ebf73..d99b666345 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -322,7 +322,9 @@ GenericValue lle_X_abort(FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
   //FIXME: should we report or raise here?
   //report_fatal_error("Interpreted program raised SIGABRT");
-  raise (SIGABRT);
+  //TODO(dschuff) fixme or figure out how to get raise()
+  abort(); // @LOCALMOD 
+  //raise (SIGABRT);
   return GenericValue();
 }
 
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 2ae155bebf..338db8f454 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -210,6 +210,8 @@ public:
 private:
   static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
                                        TargetMachine &tm);
+  // Native client needs its own memory manager, so custom ones are unsupported
+  static JITCodeEmitter *createNaClEmitter(JIT &J, TargetMachine &tm);
   void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
   void updateFunctionStub(Function *F);
   void jitTheFunction(Function *F, const MutexGuard &locked);
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 504c8bdffd..acbb20b1b2 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ExecutionEngine/NaClJITMemoryManager.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -52,12 +53,15 @@
 #ifndef NDEBUG
 #include <iomanip>
 #endif
+#ifdef __native_client__
+#include <nacl/nacl_dyncode.h>
+#endif
 using namespace llvm;
 
 STATISTIC(NumBytes, "Number of bytes of machine code compiled");
 STATISTIC(NumRelos, "Number of relocations applied");
 STATISTIC(NumRetries, "Number of retries with more memory");
-
+STATISTIC(NumNopBytes, "Number of bytes of NOPs emitted");
 
 // A declaration may stop being a declaration once it's fully read from bitcode.
 // This function returns true if F is fully read and is still a declaration.
@@ -276,8 +280,6 @@ namespace {
   /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
   /// used to output functions to memory for execution.
   class JITEmitter : public JITCodeEmitter {
-    JITMemoryManager *MemMgr;
-
     // When outputting a function stub in the context of some other function, we
     // save BufferBegin/BufferEnd/CurBufferPtr here.
     uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
@@ -287,11 +289,13 @@ namespace {
     // ask the memory manager for at least this much space.  When we
     // successfully emit the function, we reset this back to zero.
     uintptr_t SizeEstimate;
-
+protected: //TODO:(dschuff): fix/move this once we do validation and are sure
+           // which functions/data we need in NaClJITEmitter. also add LOCALMOD
+    JITMemoryManager *MemMgr;
     /// Relocations - These are the relocations that the function needs, as
     /// emitted.
     std::vector<MachineRelocation> Relocations;
-
+private:
     /// MBBLocations - This vector is a mapping from MBB ID's to their address.
     /// It is filled in by the StartMachineBasicBlock callback and queried by
     /// the getMachineBasicBlockAddress callback.
@@ -375,7 +379,7 @@ namespace {
         DE.reset(new JITDwarfEmitter(jit));
       }
     }
-    ~JITEmitter() {
+    virtual ~JITEmitter() { // @LOCALMOD
       delete MemMgr;
     }
 
@@ -393,10 +397,10 @@ namespace {
     void initJumpTableInfo(MachineJumpTableInfo *MJTI);
     void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
 
-    void startGVStub(const GlobalValue* GV,
+    virtual void startGVStub(const GlobalValue* GV,
                      unsigned StubSize, unsigned Alignment = 1);
-    void startGVStub(void *Buffer, unsigned StubSize);
-    void finishGVStub();
+    virtual void startGVStub(void *Buffer, unsigned StubSize);
+    virtual void finishGVStub();
     virtual void *allocIndirectGV(const GlobalValue *GV,
                                   const uint8_t *Buffer, size_t Size,
                                   unsigned Alignment);
@@ -468,6 +472,360 @@ namespace {
                              bool MayNeedFarStub);
     void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
   };
+
+  // @LOCALMOD-START
+  class NaClJITEmitter : public JITEmitter {
+    /* There are two Nacl-specific requirements that must be dealt with: the
+     * first is that the data and code spaces are strictly separated, and code
+     * must be copied (by the service runtime/validator)to its destination
+     * after emission and relocation have finished.
+     * The second is bundle alignment: neither instructions nor multi-
+     * instruction pseudoinstruction groups may cross bundle boundaries.
+     *
+     * Requirement 1 is dealt with jointly by NaClJITMemoryManager  and
+     * and NaClJITEmitter. NaClJITMemoryManager separates metadata from
+     * code and returns pointers in the proper space
+     * for code (startFunctionBody, allocateStub) and data (allocateSpace,
+     * startExceptionTable, etc). NaClJITEmitter emits code into a separate
+     * memory buffer (EmissionBuffer). After startFunction allocates the
+     * function's memory, NaClJITEmitter's startFunction points BufferBegin,
+     * CurBufferPtr and BufferEnd at the EmissionBuffer (this avoids having to
+     * override all of the actual emission methods from JITCodeEmitter)
+     * JITEmitter already uses this trick for emitting a stub in the middle
+     * of emitting a function so it doesn't seem so terrible to do our own
+     *  similar swapping of the pointers.
+     *
+     * Requirement 2 is bundle alignment.
+     * X86CodeEmitter makes several calls into JITCodeEmitter per instruction,
+     * to add the various bytes, constants, etc. To implement bundle alignment,
+     * we add methods to start and end a bundle-locked group
+     * (the group can include just one instruction or several).
+     * The X86CodeEmitter will pass-through any such markers created by the
+     * rewriting passes (which surround multiple-instruction groups),
+     * and will also generate them surrounding each individual instruction
+     * (there should never be more than two-deep nesting).
+     * When beginBundleLock is called, the CurBufferPtr is marked. When
+     * endBundleLock is called, it checks that the group does not cross a
+     * bundle boundary; if it does, it inserts nop padding as necessary.
+     * If padding is added, the relocations must also be fixed up; this also
+     * happens in endBundleLock.
+     *
+     */
+  public:
+    NaClJITEmitter(JIT &jit, TargetMachine &TM) :
+        JITEmitter(jit, new NaClJITMemoryManager(), TM),
+        BundleLockSavedCurBufferPtr(NULL),
+        BundleNestCount(0),
+        AlignNextGroup(kNone),
+        GroupRelocationCount(0),
+        JITInfo(&jit.getJITInfo()),
+        kBundleSize(jit.getJITInfo().getBundleSize()),
+        kJumpMask(jit.getJITInfo().getJumpMask()) {
+      uintptr_t CodeSlabSize = MemMgr->GetDefaultCodeSlabSize();
+      EmissionBuffer = MemMgr->allocateSpace(CodeSlabSize, kBundleSize);
+      EmissionBufferSize = CodeSlabSize;
+      DEBUG(dbgs() << "EmissionBuffer " << EmissionBuffer << " size "
+                  << EmissionBufferSize << "\n");
+      StubEmissionBuffer = MemMgr->allocateSpace(kBundleSize, kBundleSize);
+      StubEmissionBufferSize = kBundleSize;
+      DEBUG(dbgs() << "StubEmissionBuffer " << StubEmissionBuffer << " size "
+                  << StubEmissionBufferSize << "\n");
+      JITInfo = &jit.getJITInfo();
+    }
+
+    virtual ~NaClJITEmitter() {
+    }
+
+    static inline bool classof(const JITEmitter*) { return true; }
+
+    virtual void startFunction(MachineFunction &F) {
+      JITEmitter::startFunction(F);
+      // Make sure the emission buffer is at least as big as the allocated
+      // function
+      if (BufferEnd - BufferBegin > (intptr_t)EmissionBufferSize) {
+        EmissionBufferSize = std::max((uintptr_t)(BufferEnd - BufferBegin),
+                                      2 * EmissionBufferSize);
+        // BumpPtrAllocator doesn't do anything when you call Deallocate. it
+        // will be freed on destruction
+        EmissionBuffer = MemMgr->allocateSpace(EmissionBufferSize,
+                                                   kBundleSize);
+        DEBUG(dbgs() << "new EmissionBuffer " << EmissionBuffer << " size "
+                    << EmissionBufferSize << "\n");
+      }
+      // We ensure that the emission buffer is bundle-aligned, and constant
+      // pool emission should not go into code space
+      assert((CurBufferPtr == BufferBegin ||
+          (int)F.getFunction()->getAlignment() > kBundleSize) &&
+             "Pre-function data should not be emitted into code space");
+      if (CurBufferPtr > BufferBegin) {
+        // If CurBufferPtr has been bumped forward for alignment, we need to
+        // pad the space with nops
+        memcpy(EmissionBuffer,
+               JITInfo->getNopSequence(CurBufferPtr - BufferBegin),
+               CurBufferPtr - BufferBegin);
+        NumNopBytes += CurBufferPtr - BufferBegin;
+      }
+      FunctionDestination = BufferBegin;
+      setBufferPtrs(EmissionBuffer);
+    }
+
+    virtual bool finishFunction(MachineFunction &F) {
+      uint8_t *end = CurBufferPtr;
+      emitAlignment(kBundleSize);
+      memcpy(end, JITInfo->getNopSequence(CurBufferPtr - end),
+                   CurBufferPtr - end);
+      NumNopBytes += CurBufferPtr - end;
+      JITInfo->setRelocationBuffer(BufferBegin);
+      assert(BufferBegin == EmissionBuffer);
+      int FunctionSize = CurBufferPtr - BufferBegin;
+      setBufferPtrs(FunctionDestination);
+      bool result = JITEmitter::finishFunction(F);
+      // If we ran out of memory, don't bother validating, we'll just retry
+      if (result) return result;
+
+      DEBUG({
+        dbgs() << "Validating " << FunctionDestination << "-" <<
+            FunctionDestination + FunctionSize << "\n";
+        if (sys::hasDisassembler()) {
+          dbgs() << "Disassembled code:\n";
+          dbgs() << sys::disassembleBuffer(EmissionBuffer,
+                                           FunctionSize,
+                                           (uintptr_t)FunctionDestination);
+        } else {
+          dbgs() << "Binary code:\n";
+          uint8_t* q = BufferBegin;
+          for (int i = 0; q < CurBufferPtr; q += 4, ++i) {
+            if (i == 4)
+              i = 0;
+            if (i == 0)
+              dbgs() << "JIT: " << (long)(q - BufferBegin) << ": ";
+            bool Done = false;
+            for (int j = 3; j >= 0; --j) {
+              if (q + j >= CurBufferPtr)
+                Done = true;
+              else
+                dbgs() << (unsigned short)q[j];
+            }
+            if (Done)
+              break;
+            dbgs() << ' ';
+            if (i == 3)
+              dbgs() << '\n';
+          }
+          dbgs()<< '\n';
+        }
+      });
+#ifdef __native_client__
+      if(nacl_dyncode_create(FunctionDestination, EmissionBuffer,
+                             FunctionSize) != 0) {
+        report_fatal_error("NaCl validation failed");
+      }
+#endif
+      return result;
+    }
+
+    virtual void startGVStub(const GlobalValue* GV,
+                             unsigned StubSize, unsigned Alignment = 1) {
+      JITEmitter::startGVStub(GV, StubSize, Alignment);
+      ReusedStub = false;
+      assert(StubSize <= StubEmissionBufferSize);
+      StubDestination = BufferBegin;
+      setBufferPtrs(StubEmissionBuffer);
+    }
+    virtual void startGVStub(void *Buffer, unsigned StubSize) {
+      JITEmitter::startGVStub(Buffer, StubSize);
+      ReusedStub = true;
+      assert(StubSize <= StubEmissionBufferSize);
+      StubDestination = BufferBegin;
+      setBufferPtrs(StubEmissionBuffer);
+    }
+    virtual void finishGVStub() {
+      assert(CurBufferPtr - BufferBegin == kBundleSize);
+
+      DEBUG(dbgs() << "Validating "<< BufferBegin<<"-"<<StubDestination<<"\n");
+      int ValidationResult;
+#ifdef __native_client__
+      if (!ReusedStub) {
+         ValidationResult = nacl_dyncode_create(StubDestination, BufferBegin,
+                                                CurBufferPtr - BufferBegin);
+      } else {
+        // This is not a thread-safe modification because it updates the whole
+        // stub rather than just a jump target. However it is only used by
+        // eager compilation to replace a stub which is not in use yet
+        // (it jumps to 0).
+        ValidationResult = nacl_dyncode_modify(StubDestination, BufferBegin,
+                                               CurBufferPtr - BufferBegin);
+      }
+#endif
+      if (ValidationResult) {
+        dbgs() << "NaCl stub validation failed:\n";
+        if (sys::hasDisassembler()) {
+          dbgs() << "Disassembled code:\n";
+          dbgs() << sys::disassembleBuffer(BufferBegin,
+                                           CurBufferPtr-BufferBegin,
+                                           (uintptr_t)StubDestination);
+        }
+        report_fatal_error("Stub validation failed");
+      }
+      setBufferPtrs(StubDestination);
+      JITEmitter::finishGVStub();
+    }
+
+    /// allocateSpace - Allocates *data* space, rather than space in the
+    // current code block.
+    virtual void *allocateSpace(uintptr_t Size, unsigned Alignment) {
+      return MemMgr->allocateSpace(Size, Alignment);
+    }
+
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+      uint8_t *end = CurBufferPtr;
+      emitAlignment(MBB->getAlignment());
+      memcpy(end, JITInfo->getNopSequence(CurBufferPtr - end),
+             CurBufferPtr - end);
+      NumNopBytes += CurBufferPtr - end;
+      JITEmitter::StartMachineBasicBlock(MBB);
+    }
+
+    /// beginBundleLock - Save the current location of CurBufferPtr so we can
+    // tell if the block crosses a bundle boundary
+    virtual void beginBundleLock() {
+      assert(BundleNestCount <= 2 && "Bundle-locked groups can't be nested");
+      if (++BundleNestCount == 2) return;
+      DEBUG(dbgs() << "begin lock, buffer begin:end:cur "<<BufferBegin<<" "<<
+            BufferEnd<< " "<<CurBufferPtr << "\n");
+      BundleLockSavedCurBufferPtr = CurBufferPtr;
+      GroupRelocationCount = 0;
+    }
+
+    /// endBundleLock - Check if the group crosses a bundle boundary. If so
+    // (or if the group must be aligned to the end of a bundle), move the
+    // group and add appropriate padding
+    virtual void endBundleLock() {
+      assert(BundleNestCount > 0 && "mismatched bundle-lock start/end");
+      if (--BundleNestCount > 0) return;
+      DEBUG(dbgs() <<"end lock, buffer begin:end:cur:savd "<<BufferBegin<<" "<<
+            BufferEnd<< " "<<CurBufferPtr <<" "<<
+            BundleLockSavedCurBufferPtr<<"\n");
+
+      int GroupLen = CurBufferPtr - BundleLockSavedCurBufferPtr;
+      if (BufferEnd - CurBufferPtr <
+          GroupLen + kBundleSize) {
+        // Added padding can be no more than kBundleSize. Retry if there's any
+        // possibility of overflow
+        CurBufferPtr = BufferEnd;
+        AlignNextGroup = kNone;
+        return;
+      }
+      // Space left in the current bundle
+      int SpaceLeft = (((intptr_t)BundleLockSavedCurBufferPtr + kBundleSize)
+                       & kJumpMask) - (intptr_t)BundleLockSavedCurBufferPtr;
+      int TotalPadding = 0;
+      if (SpaceLeft < GroupLen || AlignNextGroup == kBegin) {
+        DEBUG(dbgs() << "space " << SpaceLeft <<" len "<<GroupLen<<"\n");
+        memmove(BundleLockSavedCurBufferPtr + SpaceLeft,
+                BundleLockSavedCurBufferPtr, GroupLen);
+        memcpy(BundleLockSavedCurBufferPtr, JITInfo->getNopSequence(SpaceLeft),
+               SpaceLeft);
+        NumNopBytes += SpaceLeft;
+        assert(CurBufferPtr == BundleLockSavedCurBufferPtr + GroupLen);
+        CurBufferPtr += SpaceLeft;
+        BundleLockSavedCurBufferPtr += SpaceLeft;
+        TotalPadding = SpaceLeft;
+        SpaceLeft = kBundleSize;
+      }
+
+      if (AlignNextGroup == kEnd) {
+        DEBUG(dbgs() << "alignend, space len "<<SpaceLeft<<" "<<GroupLen<<"\n");
+        int MoveDistance = SpaceLeft - GroupLen;
+        memmove(BundleLockSavedCurBufferPtr + MoveDistance,
+                BundleLockSavedCurBufferPtr, GroupLen);
+        memcpy(BundleLockSavedCurBufferPtr,
+               JITInfo->getNopSequence(MoveDistance), MoveDistance);
+        NumNopBytes += MoveDistance;
+        CurBufferPtr += MoveDistance;
+        TotalPadding += MoveDistance;
+      }
+
+      AlignNextGroup = kNone;
+
+      assert(CurBufferPtr <= BufferEnd && "Bundled group caused buf overflow");
+      if (TotalPadding && GroupRelocationCount) {
+        assert(Relocations.size() >= GroupRelocationCount &&
+               "Too many relocations recorded for this group");
+        for(std::vector<MachineRelocation>::reverse_iterator I =
+            Relocations.rbegin(); GroupRelocationCount > 0;
+            ++I, GroupRelocationCount--) {
+          int NewOffset = I->getMachineCodeOffset()
+                          + TotalPadding;
+          I->setMachineCodeOffset(NewOffset);
+        }
+      }
+    }
+
+    virtual void alignToBundleBeginning() {
+      // mark that the next locked group must be aligned to bundle start
+      // (e.g. an indirect branch target)
+      assert(AlignNextGroup == kNone && "Conflicting group alignments");
+      AlignNextGroup = kBegin;
+    }
+
+    virtual void alignToBundleEnd() {
+      // mark that the next locked group must be aligned to bundle end (e.g. a
+      // call)
+      assert(AlignNextGroup == kNone && "Conflicting group alignments");
+      AlignNextGroup = kEnd;
+    }
+
+    virtual uintptr_t getCurrentPCValue() const {
+      // return destination PC value rather than generating location
+      if (BufferBegin == EmissionBuffer) {
+        return (uintptr_t)(FunctionDestination + (CurBufferPtr - BufferBegin));
+      } else if (BufferBegin == StubEmissionBuffer) {
+        return (uintptr_t)(StubDestination + (CurBufferPtr - BufferBegin));
+      } else {
+        return (uintptr_t)CurBufferPtr;
+      }
+    }
+
+    // addRelocation gets called in the middle of emitting an instruction, and
+    // creates the relocation based on the instruction's current position in
+    // the emission buffer; however it could get moved if it crosses the bundle
+    // boundary. so we intercept relocation creation and adjust newly-created
+    // relocations if necessary
+    virtual void addRelocation(const MachineRelocation &MR) {
+      GroupRelocationCount++;
+      JITEmitter::addRelocation(MR);
+    }
+
+ private:
+    typedef enum _GroupAlign { kNone, kBegin, kEnd } GroupAlign;
+    // FunctionDestination points to the final destination for the function
+    // (i.e. where it will be copied after validation)
+    uint8_t *FunctionDestination;
+    uint8_t *BundleLockSavedCurBufferPtr;
+    int BundleNestCount; // should not exceed 2
+    GroupAlign AlignNextGroup;
+    unsigned GroupRelocationCount;
+    uint8_t *EmissionBuffer;
+    uintptr_t EmissionBufferSize;
+
+    bool ReusedStub;
+    uint8_t *StubDestination;
+    uint8_t *StubEmissionBuffer;
+    uintptr_t StubEmissionBufferSize;
+
+    TargetJITInfo *JITInfo;
+    const int kBundleSize;
+    const int32_t kJumpMask;
+
+    // Set the buffer pointers (begin, cur, end) so they point into the buffer
+    // at dest, preserving their relative positions
+    void setBufferPtrs(uint8_t* dest) {
+      BufferEnd = dest + (BufferEnd - BufferBegin);
+      CurBufferPtr = dest + (CurBufferPtr - BufferBegin);
+      BufferBegin = dest;
+    }
+};
 }
 
 void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
@@ -934,6 +1292,12 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
   // Mark code region readable and executable if it's not so already.
   MemMgr->setMemoryExecutable();
 
+  // @LOCALMOD-START
+#ifndef __native_client__
+  // In NaCl, we haven't yet validated and copied the function code to the
+  // destination yet, so there is nothing to disassemble. Furthermore we can't
+  // touch the destination because it may not even be mapped yet
+  // @LOCALMOD-END
   DEBUG({
       if (sys::hasDisassembler()) {
         dbgs() << "JIT: Disassembled code:\n";
@@ -963,6 +1327,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
         dbgs()<< '\n';
       }
     });
+#endif // @LOCALMOD
 
   if (JITExceptionHandling) {
     uintptr_t ActualSize = 0;
@@ -1247,7 +1612,14 @@ void JITEmitter::EmittedFunctionConfig::onRAUW(
 
 JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
                                    TargetMachine &tm) {
+// @LOCALMOD-START
+#ifndef __native_client__
   return new JITEmitter(jit, JMM, tm);
+#else
+  assert(!JMM && "NaCl does not support custom memory managers");
+  return new NaClJITEmitter(jit, tm);
+#endif
+// @LOCALMOD-END
 }
 
 // getPointerToFunctionOrStub - If the specified function has been
diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt
index e22b8cd406..f7f814b9cb 100644
--- a/lib/LLVMBuild.txt
+++ b/lib/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore
+subdirectories = Analysis Archive AsmParser Bitcode CodeGen DebugInfo ExecutionEngine Linker MC Object Support TableGen Target Transforms VMCore Wrap
 
 [component_0]
 type = Group
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index c16d1958cd..c5656a54c9 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -16,10 +16,24 @@
 #include "llvm/Module.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/Bitcode/Archive.h"
+
+#include "llvm/Support/CommandLine.h" // @LOCALMOD
+
 #include <memory>
 #include <set>
 using namespace llvm;
 
+// @LOCALMOD-START
+// NOTE: this has a similar effect as
+//        tools/llvm/llvm-preserve.ll
+// which in turn is similar to the GNUS's attribute((used))
+// TODO(robertm): This is a little hackish for now
+static cl::list<std::string>
+UndefList("referenced-list", cl::value_desc("list"),
+          cl::desc("A list of symbols assumed to be referenced externally"),
+          cl::CommaSeparated);
+// @LOCALMOD-END
+  
 /// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
 /// exist in an LLVM module. This is a bit tricky because there may be two
 /// symbols with the same name but different LLVM types that will be resolved to
@@ -36,7 +50,10 @@ static void
 GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
   std::set<std::string> DefinedSymbols;
   UndefinedSymbols.clear();
-
+  // @LOCALMOD-START
+  UndefinedSymbols.insert(UndefList.begin(), UndefList.end());
+  // @LOCALMOD-END
+  
   // If the program doesn't define a main, try pulling one in from a .a file.
   // This is needed for programs where the main function is defined in an
   // archive, such f2c'd programs.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 7293f3d0e8..aec2547f00 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -932,6 +932,19 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
     ValueMap[I] = DI;
   }
 
+  // @LOCALMOD-BEGIN
+  // Local patch for http://llvm.org/bugs/show_bug.cgi?id=11112
+  // and http://llvm.org/bugs/show_bug.cgi?id=10887
+  // Create an identity mapping for instructions so that alloca instructions
+  // do not get dropped and related debug info isn't lost.  E.g., prevent
+  //   call @llvm.dbg.declare(metadata !{i32 * %local_var}, ...)
+  // from becoming
+  //   call @llvm.dbg.declare(null, ...)
+  for (Function::iterator BB = Src->begin(), BE = Src->end(); BB != BE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      ValueMap[I] = I;
+  // @LOCALMOD-END
+
   if (Mode == Linker::DestroySource) {
     // Splice the body of the source function into the dest function.
     Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
@@ -949,6 +962,13 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
     SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
     CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap);
   }
+
+  // @LOCALMOD-BEGIN
+  // There is no need for the identity mapping anymore.
+  for (Function::iterator BB = Src->begin(), BE = Src->end(); BB != BE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      ValueMap.erase(I);
+  // @LOCALMOD-END
   
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index 7b166fb56c..6d6d23a893 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -667,7 +667,12 @@ const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
   if (&Sec2 != &Section &&
       (Kind == MCSymbolRefExpr::VK_PLT ||
        Kind == MCSymbolRefExpr::VK_GOTPCREL ||
-       Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+       Kind == MCSymbolRefExpr::VK_GOTOFF ||
+  // @LOCALMOD-BEGIN-IS-UPSTREAM
+  // Fixes an LLVM bug. This bug has already been fixed upstream
+  // and should disappear on the next merge.
+       Kind == MCSymbolRefExpr::VK_NTPOFF)) {
+  // @LOCALMOD-END
     if (Renamed)
       return Renamed;
     return &Symbol;
@@ -1552,4 +1557,5 @@ MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
                                             raw_ostream &OS,
                                             bool IsLittleEndian) {
   return new ELFObjectWriter(MOTW, OS, IsLittleEndian);
+
 }
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index d9c1d51d18..9f98cb32e9 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
 
 MCAsmInfo::MCAsmInfo() {
   PointerSize = 4;
+  StackSlotSize = 4; // @LOCALMOD
   IsLittleEndian = true;
   StackGrowsUp = false;
   HasSubsectionsViaSymbols = false;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 1b7d037568..1371396632 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -205,6 +205,13 @@ public:
   virtual bool EmitValueToOffset(const MCExpr *Offset,
                                  unsigned char Value = 0);
 
+  // @LOCALMOD-BEGIN
+  virtual void EmitBundleLock();
+  virtual void EmitBundleUnlock();
+  virtual void EmitBundleAlignStart();
+  virtual void EmitBundleAlignEnd();
+  // @LOCALMOD-END
+
   virtual void EmitFileDirective(StringRef Filename);
   virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
                                       StringRef Filename);
@@ -776,6 +783,27 @@ bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
   return false;
 }
 
+// @LOCALMOD-BEGIN
+void MCAsmStreamer::EmitBundleLock() {
+  OS << "\t.bundle_lock";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleUnlock() {
+  OS << "\t.bundle_unlock";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleAlignStart() {
+  OS << "\t.bundle_align_start";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBundleAlignEnd() {
+  OS << "\t.bundle_align_end";
+  EmitEOL();
+}
+// @LOCALMOD-END
 
 void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
   assert(MAI.hasSingleParameterDotFile());
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 66ba9b81f3..a914e12cdc 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -70,6 +70,26 @@ bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
 }
 
 void MCAsmLayout::Invalidate(MCFragment *F) {
+  // @LOCALMOD-BEGIN
+  if (F->getParent()->isBundlingEnabled()) {
+    // If this fragment is part of a bundle locked group,
+    // we need to invalidate all the way to the first fragment
+    // in the group.
+    while (F && !F->isBundleGroupStart())
+      F = F->getPrevNode();
+    assert(F);
+    // With padding enabled, we need to invalidate back one
+    // fragment further in in order to force the recalculuation
+    // of the padding and offset.
+    if (F->getPrevNode()) {
+      F = F->getPrevNode();
+    } else {
+      LastValidFragment[F->getParent()] = NULL;
+      return;
+    }
+  }
+  // @LOCALMOD-END
+
   // If this fragment wasn't already up-to-date, we don't need to do anything.
   if (!isFragmentUpToDate(F))
     return;
@@ -132,6 +152,15 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
   assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
   return getFragmentOffset(SD->getFragment()) + SD->getOffset();
 }
+  
+// @LOCALMOD-BEGIN
+uint8_t MCAsmLayout::getFragmentPadding(const MCFragment *F) const {
+  EnsureValid(F);
+  assert(F->BundlePadding != (uint8_t)~UINT8_C(0) && "Padding not set!");
+  return F->BundlePadding;
+}
+// @LOCALMOD-END
+
 
 uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
   // The size is the last fragment's end offset.
@@ -157,10 +186,31 @@ MCFragment::~MCFragment() {
 }
 
 MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
-  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)),
+    // @LOCALMOD-BEGIN
+    BundleAlign(BundleAlignNone),
+    BundleGroupStart(false),
+    BundleGroupEnd(false),
+    BundlePadding(~UINT8_C(0))
+    // @LOCALMOD-END
 {
   if (Parent)
     Parent->getFragmentList().push_back(this);
+
+  // @LOCALMOD-BEGIN
+  if (Parent && Parent->isBundlingEnabled()) {
+    BundleAlign = Parent->getBundleAlignNext();
+    Parent->setBundleAlignNext(MCFragment::BundleAlignNone);
+    if (Parent->isBundleLocked()) {
+      BundleGroupStart = Parent->isBundleGroupFirstFrag();
+      BundleGroupEnd = false;
+      Parent->setBundleGroupFirstFrag(false);
+    } else {
+      BundleGroupStart = true;
+      BundleGroupEnd = true;
+    }
+  }
+  // @LOCALMOD-END
 }
 
 /* *** */
@@ -171,10 +221,24 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
   : Section(&_Section),
     Ordinal(~UINT32_C(0)),
     Alignment(1),
-    HasInstructions(false)
+    HasInstructions(false),
+// @LOCALMOD-BEGIN
+    BundlingEnabled(false),
+    BundleLocked(false),
+    BundleGroupFirstFrag(false),
+    BundleAlignNext(MCFragment::BundleAlignNone)
+// @LOCALMOD-END
 {
   if (A)
     A->getSectionList().push_back(this);
+
+  // @LOCALMOD-BEGIN
+  unsigned BundleSize = A->getBackend().getBundleSize();
+  if (BundleSize && _Section.UseCodeAlign()) {
+    BundlingEnabled = true;
+    setAlignment(BundleSize);
+  }
+  // @LOCALMOD-END
 }
 
 /* *** */
@@ -319,7 +383,10 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
 
   case MCFragment::FT_LEB:
     return cast<MCLEBFragment>(F).getContents().size();
-
+// @LOCALMOD-BEGIN
+  case MCFragment::FT_Tiny:
+    return cast<MCTinyFragment>(F).getContents().size();
+// @LOCALMOD-END
   case MCFragment::FT_Align: {
     const MCAlignFragment &AF = cast<MCAlignFragment>(F);
     unsigned Offset = Layout.getFragmentOffset(&AF);
@@ -369,15 +436,139 @@ void MCAsmLayout::LayoutFragment(MCFragment *F) {
   uint64_t Offset = 0;
   if (Prev)
     Offset += Prev->Offset + getAssembler().computeFragmentSize(*this, *Prev);
-
+  // @LOCALMOD-BEGIN
+  F->BundlePadding = getAssembler().ComputeBundlePadding(*this, F, Offset);
+  Offset += F->BundlePadding;
+  // @LOCALMOD-END
   F->Offset = Offset;
   LastValidFragment[F->getParent()] = F;
 }
 
+// @LOCALMOD-BEGIN
+// Returns number of bytes of padding needed to align to bundle start.
+static uint64_t AddressToBundlePadding(uint64_t Address, uint64_t BundleMask) {
+  return (~Address + 1) & BundleMask;
+}
+
+uint64_t MCAssembler::getBundleSize() const {
+  return getBackend().getBundleSize();
+}
+
+uint64_t MCAssembler::getBundleMask() const {
+  uint64_t BundleSize = getBundleSize();
+  uint64_t BundleMask = BundleSize - 1;
+  assert(BundleSize != 0);
+  assert((BundleSize & BundleMask) == 0 &&
+         "Bundle size must be a power of 2!");
+  return BundleMask;
+}
+
+static unsigned ComputeGroupSize(MCFragment *F) {
+  if (!F->isBundleGroupStart()) {
+    return 0;
+  }
+
+  unsigned GroupSize = 0;
+  MCFragment *Cur = F;
+  while (Cur) {
+    switch (Cur->getKind()) {
+    default: llvm_unreachable("Unexpected fragment type in bundle!");
+    case MCFragment::FT_Align:
+    case MCFragment::FT_Org:
+    case MCFragment::FT_Fill:
+      if (Cur == F && Cur->isBundleGroupEnd()) {
+        return 0;
+      }
+      llvm_unreachable(".bundle_lock cannot contain .align, .org, or .fill");
+    case MCFragment::FT_Inst:
+      GroupSize += cast<MCInstFragment>(Cur)->getInstSize();
+      break;
+    case MCFragment::FT_Data:
+      GroupSize += cast<MCDataFragment>(Cur)->getContents().size();
+      break;
+    case MCFragment::FT_Tiny:
+      GroupSize += cast<MCTinyFragment>(Cur)->getContents().size();
+      break;
+    }
+    if (Cur->isBundleGroupEnd())
+      break;
+    Cur = Cur->getNextNode();
+  }
+  return GroupSize;
+}
+
+uint8_t MCAssembler::ComputeBundlePadding(const MCAsmLayout &Layout,
+                                          MCFragment *F,
+                                          uint64_t FragmentOffset) const {
+  if (!F->getParent()->isBundlingEnabled())
+    return 0;
+
+  uint64_t BundleSize = getBundleSize();
+  uint64_t BundleMask = getBundleMask();
+  unsigned GroupSize = ComputeGroupSize(F);
+  assert(GroupSize <= BundleSize &&
+         "Bundle lock contents too large!");
+
+  uint64_t Padding = 0;
+  uint64_t OffsetInBundle = FragmentOffset & BundleMask;
+
+  if (OffsetInBundle + GroupSize > BundleSize ||
+      F->getBundleAlign() == MCFragment::BundleAlignStart) {
+    // Pad up to start of the next bundle
+    Padding += AddressToBundlePadding(OffsetInBundle, BundleMask);
+    OffsetInBundle = 0;
+  }
+  if (F->getBundleAlign() == MCFragment::BundleAlignEnd) {
+    // Push to the end of the bundle
+    Padding += AddressToBundlePadding(OffsetInBundle + GroupSize, BundleMask);
+  }
+  return Padding;
+}
+// @LOCALMOD-END
+
+
+
+
+// @LOCALMOD-BEGIN
+// Write out BundlePadding bytes in NOPs, being careful not to cross a bundle
+// boundary.
+static void WriteBundlePadding(const MCAssembler &Asm,
+                               const MCAsmLayout &Layout,
+                               uint64_t Offset, uint64_t TotalPadding,
+                               MCObjectWriter *OW) {
+  uint64_t BundleSize = Asm.getBundleSize();
+  uint64_t BundleMask = Asm.getBundleMask();
+  uint64_t PaddingLeft = TotalPadding;
+  uint64_t StartPos = Offset;
+
+  bool FirstWrite = true;
+  while (PaddingLeft > 0) {
+    uint64_t NopsToWrite =
+      FirstWrite ? AddressToBundlePadding(StartPos, BundleMask) :
+                   BundleSize;
+    if (NopsToWrite > PaddingLeft)
+      NopsToWrite = PaddingLeft;
+    if (!Asm.getBackend().writeNopData(NopsToWrite, OW))
+      report_fatal_error("unable to write nop sequence of " +
+                         Twine(NopsToWrite) + " bytes");
+    PaddingLeft -= NopsToWrite;
+    FirstWrite = false;
+  }
+}
+// @LOCALMOD-END
+
 /// WriteFragmentData - Write the \arg F data to the output file.
 static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
                               const MCFragment &F) {
   MCObjectWriter *OW = &Asm.getWriter();
+  // @LOCALMOD-BEGIN
+  if (F.getParent()->isBundlingEnabled()) {
+    uint64_t BundlePadding = Layout.getFragmentPadding(&F);
+    uint64_t PaddingOffset = Layout.getFragmentOffset(&F) - BundlePadding;
+    WriteBundlePadding(Asm, Layout, PaddingOffset, BundlePadding, OW);
+  }
+  // @LOCALMOD-END
+
   uint64_t Start = OW->getStream().tell();
   (void) Start;
 
@@ -406,6 +597,16 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     // bytes left to fill use the the Value and ValueSize to fill the rest.
     // If we are aligning with nops, ask that target to emit the right data.
     if (AF.hasEmitNops()) {
+      // @LOCALMOD-BEGIN
+      if (Asm.getBundleSize()) {
+        WriteBundlePadding(Asm, Layout,
+                           Layout.getFragmentOffset(&F),
+                           FragmentSize,
+                           OW);
+        break;
+      }
+      // @LOCALMOD-END
+
       if (!Asm.getBackend().writeNopData(Count, OW))
         report_fatal_error("unable to write nop sequence of " +
                           Twine(Count) + " bytes");
@@ -432,6 +633,15 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
     break;
   }
 
+  // @LOCALMOD-BEGIN
+  case MCFragment::FT_Tiny: {
+    MCTinyFragment &TF = cast<MCTinyFragment>(F);
+    assert(FragmentSize == TF.getContents().size() && "Invalid size!");
+    OW->WriteBytes(TF.getContents().str());
+    break;
+  }
+  // @LOCALMOD-END
+
   case MCFragment::FT_Fill: {
     MCFillFragment &FF = cast<MCFillFragment>(F);
 
@@ -836,6 +1046,9 @@ void MCFragment::dump() {
   case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
   case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
   case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
+  // @LOCALMOD-BEGIN
+  case MCFragment::FT_Tiny: OS << "MCTinyFragment"; break;
+  // @LOCALMOD-END
   }
 
   OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
@@ -888,6 +1101,20 @@ void MCFragment::dump() {
     IF->getInst().dump_pretty(OS);
     break;
   }
+  // @LOCALMOD-BEGIN
+  case MCFragment::FT_Tiny: {
+    const MCTinyFragment *TF = cast<MCTinyFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = TF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+    break;
+  }
+  // @LOCALMOD-END
   case MCFragment::FT_Org:  {
     const MCOrgFragment *OF = cast<MCOrgFragment>(this);
     OS << "\n       ";
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 91864fb7a8..52d4ab5e70 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -774,7 +774,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
-  int size = asmInfo.getPointerSize();
+  int size = asmInfo.getStackSlotSize(); // @LOCALMOD
   if (asmInfo.isStackGrowthDirectionUp())
     return size;
   else
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index 6ac9d9d51c..39cc4eb415 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -466,7 +466,6 @@ void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
 }
 
 void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
-  MCDataFragment *DF = getOrCreateDataFragment();
 
   SmallVector<MCFixup, 4> Fixups;
   SmallString<256> Code;
@@ -477,12 +476,21 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
     fixSymbolsInTLSFixups(Fixups[i].getValue());
 
-  // Add the fixups and data.
-  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->addFixup(Fixups[i]);
+  // @LOCALMOD-BEGIN
+  if (Fixups.size() > 0) {
+    MCDataFragment *DF = getOrCreateDataFragment();
+
+    // Add the fixups and data.
+    for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+      Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+      DF->addFixup(Fixups[i]);
+    }
+    DF->getContents().append(Code.begin(), Code.end());
+  } else {
+    MCTinyFragment *TF = new MCTinyFragment(getCurrentSectionData());
+    TF->getContents().append(Code.begin(), Code.end());
   }
-  DF->getContents().append(Code.begin(), Code.end());
+  // @LOCALMOD-END
 }
 
 void MCELFStreamer::FinishImpl() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index c9e2c56811..f669faad14 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -83,6 +83,13 @@ namespace {
     virtual bool EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value = 0) { return false; }
 
+    // @LOCALMOD-BEGIN
+    virtual void EmitBundleLock() {}
+    virtual void EmitBundleUnlock() {}
+    virtual void EmitBundleAlignStart() {}
+    virtual void EmitBundleAlignEnd() {}
+    // @LOCALMOD-END
+
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
                                         StringRef Filename) {
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index bad7cfe38a..6964d12267 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h" // @LOCALMOD
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
@@ -54,6 +55,11 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const {
 }
 
 MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+  // @LOCALMOD-BEGIN
+  if (getCurrentSectionData()->isBundlingEnabled()) {
+    return new MCDataFragment(getCurrentSectionData());
+  }
+  // @LOCALMOD-END
   MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
   if (!F)
     F = new MCDataFragment(getCurrentSectionData());
@@ -153,6 +159,55 @@ void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
   report_fatal_error("This file format doesn't support weak aliases.");
 }
 
+// @LOCALMOD-BEGIN ========================================================
+
+void MCObjectStreamer::EmitBundleAlignStart() {
+  MCSectionData *SD = getCurrentSectionData();
+  assert(SD->isBundlingEnabled() &&
+         ".bundle_align_start called, but bundling disabled!");
+  assert(!SD->isBundleLocked() &&
+         ".bundle_align_start while bundle locked");
+  SD->setBundleAlignNext(MCFragment::BundleAlignStart);
+}
+
+void MCObjectStreamer::EmitBundleAlignEnd() {
+  MCSectionData *SD = getCurrentSectionData();
+  assert(SD->isBundlingEnabled() &&
+         ".bundle_align_end called, but bundling disabled!");
+  assert(!SD->isBundleLocked() &&
+         ".bundle_align_end while bundle locked");
+  SD->setBundleAlignNext(MCFragment::BundleAlignEnd);
+}
+
+void MCObjectStreamer::EmitBundleLock() {
+  MCSectionData *SD = getCurrentSectionData();
+  assert(SD->isBundlingEnabled() &&
+         ".bundle_lock called, but bundling disabled!");
+  assert(!SD->isBundleLocked() &&
+         ".bundle_lock issued when bundle already locked");
+  SD->setBundleLocked(true);
+  SD->setBundleGroupFirstFrag(true);
+}
+
+void MCObjectStreamer::EmitBundleUnlock() {
+  MCSectionData *SD = getCurrentSectionData();
+  assert(SD->isBundlingEnabled() &&
+         ".bundle_unlock called, but bundling disabled!");
+  assert(SD->isBundleLocked() &&
+         ".bundle_unlock called when bundle not locked");
+
+  // If there has been at least one fragment emitted inside
+  // this bundle lock, then we need to mark the last emitted
+  // fragment as the group end.
+  if (!SD->isBundleGroupFirstFrag()) {
+    assert(getCurrentFragment() != NULL);
+    getCurrentFragment()->setBundleGroupEnd(true);
+  }
+  SD->setBundleLocked(false);
+  SD->setBundleGroupFirstFrag(false);
+}
+// @LOCALMOD-END ==========================================================
+
 void MCObjectStreamer::ChangeSection(const MCSection *Section) {
   assert(Section && "Cannot switch to a null section!");
 
@@ -160,6 +215,13 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section) {
 }
 
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+
+  // @LOCALMOD-BEGIN
+  if (getAssembler().getBackend().CustomExpandInst(Inst, *this)) {
+    return;
+  }
+  // @LOCALMOD-END
+
   // Scan for values.
   for (unsigned i = Inst.getNumOperands(); i--; )
     if (Inst.getOperand(i).isExpr())
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 04603e994a..eefb3e1ad4 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -240,6 +240,13 @@ private:
   // ".align{,32}", ".p2align{,w,l}"
   bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
 
+  // @LOCALMOD-BEGIN
+  bool ParseDirectiveBundleLock();
+  bool ParseDirectiveBundleUnlock();
+  bool ParseDirectiveBundleAlignStart();
+  bool ParseDirectiveBundleAlignEnd();
+  // @LOCALMOD-END
+
   /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
   /// accepts a single symbol (which should be a label or an external).
   bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
@@ -518,6 +525,13 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   if (!NoInitialTextSection)
     Out.InitSections();
 
+  // @LOCALMOD-BEGIN
+  // This is needed to make crtn compile, but do we really need this?
+  // TODO(pdox): Figure out if there's a better way or place to define this.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(StringRef("NACLENTRYALIGN"));
+  Out.EmitAssignment(Sym, MCConstantExpr::Create(5, getContext()));
+  // @LOCALMOD-END
+
   // Prime the lexer.
   Lex();
 
@@ -1219,6 +1233,17 @@ bool AsmParser::ParseStatement() {
     if (IDVal == ".p2alignl")
       return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
 
+    // @LOCALMOD-BEGIN
+    if (IDVal == ".bundle_lock")
+      return ParseDirectiveBundleLock();
+    if (IDVal == ".bundle_unlock")
+      return ParseDirectiveBundleUnlock();
+    if (IDVal == ".bundle_align_start")
+      return ParseDirectiveBundleAlignStart();
+    if (IDVal == ".bundle_align_end")
+      return ParseDirectiveBundleAlignEnd();
+    // @LOCALMOD-END
+
     if (IDVal == ".org")
       return ParseDirectiveOrg();
 
@@ -2161,6 +2186,50 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
   return false;
 }
 
+// @LOCALMOD-BEGIN
+bool AsmParser::ParseDirectiveBundleLock() {
+  CheckForValidSection();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.bundle_lock' directive");
+  Lex();
+  getStreamer().EmitBundleLock();
+  return false;
+}
+
+bool AsmParser::ParseDirectiveBundleUnlock() {
+  CheckForValidSection();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.bundle_unlock' directive");
+  Lex();
+  getStreamer().EmitBundleUnlock();
+  return false;
+}
+
+bool AsmParser::ParseDirectiveBundleAlignStart() {
+  CheckForValidSection();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.bundle_align_start' directive");
+  Lex();
+  getStreamer().EmitBundleAlignStart();
+  return false;
+}
+
+bool AsmParser::ParseDirectiveBundleAlignEnd() {
+  CheckForValidSection();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.bundle_align_end' directive");
+  Lex();
+  getStreamer().EmitBundleAlignEnd();
+  return false;
+}
+
+// @LOCALMOD-END
+
+
 /// ParseDirectiveSymbolAttribute
 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
 bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 0a44e7731b..e93b4de969 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -391,5 +391,11 @@ void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
       AddFeature("64bit");
       AddFeature("altivec");
     }
+// @LOCALMOD-BEGIN
+  } else if (Triple.getArch() == Triple::arm &&
+             Triple.getOS() == Triple::NativeClient) {
+    AddFeature("-neon");
+    AddFeature("+vfp2");
+// @LOCALMOD-END
   }
 }
diff --git a/lib/Makefile b/lib/Makefile
index fd575cd195..c59d77d009 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,12 @@ LEVEL = ..
 include $(LEVEL)/Makefile.config
 
 PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Linker MC Object DebugInfo
+                Target ExecutionEngine Linker MC Object Wrap DebugInfo
+
+ifeq ($(NACL_SANDBOX),1)
+  PARALLEL_DIRS := $(filter-out Archive Linker, \
+                $(PARALLEL_DIRS))
+endif
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index e175056279..508bec4028 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -267,6 +267,7 @@ void CrashRecoveryContext::Enable() {
 
   gCrashRecoveryEnabled = true;
 
+#if !defined(__native_client__)
   // Setup the signal handler.
   struct sigaction Handler;
   Handler.sa_handler = CrashRecoverySignalHandler;
@@ -276,6 +277,9 @@ void CrashRecoveryContext::Enable() {
   for (unsigned i = 0; i != NumSignals; ++i) {
     sigaction(Signals[i], &Handler, &PrevActions[i]);
   }
+#else
+#warning Cannot setup the signal handler on this machine
+#endif
 }
 
 void CrashRecoveryContext::Disable() {
@@ -286,9 +290,11 @@ void CrashRecoveryContext::Disable() {
 
   gCrashRecoveryEnabled = false;
 
+#if !defined(__native_client__)
   // Restore the previous signal handlers.
   for (unsigned i = 0; i != NumSignals; ++i)
     sigaction(Signals[i], &PrevActions[i], 0);
+#endif
 }
 
 #endif
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index fb02c07e4a..41ddc3a3bc 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -187,3 +187,4 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
 }
 
 #endif // LLVM_ON_WIN32
+
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 64404a1a8e..d2cb5c969e 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -19,7 +19,7 @@
 #include <unistd.h>
 #endif
 using namespace llvm;
-
+#ifndef __native_client__
 /// \brief Attempt to read the lock file with the given name, if it exists.
 ///
 /// \param LockFileName The name of the lock file to read.
@@ -214,3 +214,5 @@ void LockFileManager::waitForUnlock() {
 
   // Give up.
 }
+
+#endif
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 16e5c7a9f7..90672b68f7 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -231,7 +231,7 @@ error_code MemoryBuffer::getFile(const char *Filename,
 static bool shouldUseMmap(int FD,
                           size_t FileSize,
                           size_t MapSize,
-                          off_t Offset,
+                          int64_t Offset,
                           bool RequiresNullTerminator,
                           int PageSize) {
   // We don't use mmap for small files because this can severely fragment our
@@ -243,6 +243,10 @@ static bool shouldUseMmap(int FD,
     return true;
 
 
+// LLVM uses mmap to read the file contents. This disallows use of the
+// wrapper syscalls defined in tools/llc/nacl_file.c. Thus, when NACL_SRPC
+// is specified, code sequence execising the read syscall below is used.
+#if !defined(NACL_SRPC)
   // If we don't know the file size, use fstat to find out.  fstat on an open
   // file descriptor is cheaper than stat on a random path.
   // FIXME: this chunk of code is duplicated, but it avoids a fstat when
@@ -255,6 +259,9 @@ static bool shouldUseMmap(int FD,
     }
     FileSize = FileInfo.st_size;
   }
+#else
+  assert(FileSize != -1 && "invalid file size!");
+#endif
 
   // If we need a null terminator and the end of the map is inside the file,
   // we cannot use mmap.
@@ -282,6 +289,7 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
   if (MapSize == uint64_t(-1)) {
     // If we don't know the file size, use fstat to find out.  fstat on an open
     // file descriptor is cheaper than stat on a random path.
+#if !defined(NACL_SRPC)
     if (FileSize == uint64_t(-1)) {
       struct stat FileInfo;
       // TODO: This should use fstat64 when available.
@@ -290,13 +298,16 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
       }
       FileSize = FileInfo.st_size;
     }
+#else
+    assert(FileSize != -1 && "invalid file size!");
+#endif
     MapSize = FileSize;
   }
 
   if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
                     PageSize)) {
-    off_t RealMapOffset = Offset & ~(PageSize - 1);
-    off_t Delta = Offset - RealMapOffset;
+    int64_t RealMapOffset = Offset & ~(PageSize - 1);
+    int64_t Delta = Offset - RealMapOffset;
     size_t RealMapSize = MapSize + Delta;
 
     if (const char *Pages = sys::Path::MapInFilePages(FD,
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index da5baab4be..e2ed1d2e7c 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -59,7 +59,7 @@ MutexImpl::MutexImpl( bool recursive)
   errorcode = pthread_mutexattr_settype(&attr, kind);
   assert(errorcode == 0);
 
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__) && !defined(__native_client__)
   // Make it a process local mutex
   errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
   assert(errorcode == 0);
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index 726e2fbcf0..aa06763258 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -19,7 +19,9 @@
 #include "llvm/Config/config.h"
 #include "llvm/ADT/StringRef.h"
 #include "Unix.h"
+#if !defined(__native_client__)
 #include <sys/utsname.h>
+#endif // (__native_client__)
 #include <cctype>
 #include <string>
 #include <cstdlib> // ::getenv
@@ -27,12 +29,16 @@
 using namespace llvm;
 
 static std::string getOSVersion() {
+#if !defined(__native_client__)
   struct utsname info;
 
   if (uname(&info))
     return "";
 
   return info.release;
+#else // (__native_client__)
+  return "";
+#endif // (__native_client__)
 }
 
 std::string sys::getDefaultTargetTriple() {
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 5a57a28706..13c35e8aa8 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -12,8 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "Unix.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/Debug.h"
 
 #ifdef HAVE_SYS_MMAN_H
 #include <sys/mman.h>
@@ -61,8 +63,10 @@ llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
   void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
                     flags, fd, 0);
 #else
+dbgs() << "calling mmap, start " << start << "\n";
   void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
                     flags, fd, 0);
+  DEBUG(dbgs() << "mmap returned " << pa<<"\n");
 #endif
   if (pa == MAP_FAILED) {
     if (NearBlock) //Try again without a near hint
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index ddc1e0f9ce..9857674a0b 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -133,7 +133,9 @@ Path::GetRootDirectory() {
 
 Path
 Path::GetTemporaryDirectory(std::string *ErrMsg) {
-#if defined(HAVE_MKDTEMP)
+#if defined(__native_client__)
+  return Path("");
+#elif defined(HAVE_MKDTEMP)
   // The best way is with mkdtemp but that's not available on many systems,
   // Linux and FreeBSD have it. Others probably won't.
   char pathname[] = "/tmp/llvm_XXXXXX";
@@ -251,6 +253,7 @@ Path::GetUserHomeDirectory() {
 
 Path
 Path::GetCurrentDirectory() {
+#if !defined(__native_client__)
   char pathname[MAXPATHLEN];
   if (!getcwd(pathname, MAXPATHLEN)) {
     assert(false && "Could not query current working directory.");
@@ -258,6 +261,9 @@ Path::GetCurrentDirectory() {
   }
 
   return Path(pathname);
+#else // (__native_client__)
+  return Path("./");
+#endif // (__native_client__)
 }
 
 #if defined(__FreeBSD__) || defined (__NetBSD__) || \
@@ -318,7 +324,9 @@ getprogpath(char ret[PATH_MAX], const char *bin)
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
 Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-#if defined(__APPLE__)
+#if defined(__native_client__)
+   return Path(std::string("./") + std::string(argv0));
+#elif defined(__APPLE__)
   // On OS X the executable path is saved to the stack by dyld. Reading it
   // from there is much faster than calling dladdr, especially for large
   // binaries with symbols.
@@ -411,7 +419,11 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
 
 bool
 Path::exists() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), F_OK );
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
@@ -424,21 +436,33 @@ Path::isDirectory() const {
 
 bool
 Path::isSymLink() const {
+#if defined(__native_client__)
+  return false;
+#else
   struct stat buf;
   if (0 != lstat(path.c_str(), &buf))
     return false;
   return S_ISLNK(buf.st_mode);
+#endif
 }
 
 
 bool
 Path::canRead() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), R_OK);
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
 Path::canWrite() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), W_OK);
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
@@ -457,6 +481,7 @@ Path::isRegularFile() const {
 
 bool
 Path::canExecute() const {
+#if !defined(__native_client__)
   if (0 != access(path.c_str(), R_OK | X_OK ))
     return false;
   struct stat buf;
@@ -464,6 +489,7 @@ Path::canExecute() const {
     return false;
   if (!S_ISREG(buf.st_mode))
     return false;
+#endif // (__native_client__)
   return true;
 }
 
@@ -511,6 +537,7 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
 }
 
 static bool AddPermissionBits(const Path &File, int bits) {
+#if !defined(__native_client__)
   // Get the umask value from the operating system.  We want to use it
   // when changing the file's permissions. Since calling umask() sets
   // the umask and returns its old value, we must call it a second
@@ -526,6 +553,7 @@ static bool AddPermissionBits(const Path &File, int bits) {
   // that the umask would not disable.
   if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
       return false;
+#endif // (__native_client__)
   return true;
 }
 
@@ -549,6 +577,7 @@ bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
 
 bool
 Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+#if !defined(__native_client__)
   DIR* direntries = ::opendir(path.c_str());
   if (direntries == 0)
     return MakeErrMsg(ErrMsg, path + ": can't open directory");
@@ -574,6 +603,7 @@ Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
   }
 
   closedir(direntries);
+#endif
   return false;
 }
 
@@ -626,7 +656,7 @@ Path::eraseSuffix() {
 }
 
 static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
-
+#if !defined(__native_client__)
   if (access(beg, R_OK | W_OK) == 0)
     return false;
 
@@ -651,6 +681,9 @@ static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
   }
 
   return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+#else // (__native_client__)
+  return false;
+#endif // (__native_client__)
 }
 
 bool
@@ -674,11 +707,13 @@ Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
 
 bool
 Path::createFileOnDisk(std::string* ErrMsg) {
+#if !defined(__native_client__)
   // Create the file
   int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
   if (fd < 0)
     return MakeErrMsg(ErrMsg, path + ": can't create file");
   ::close(fd);
+#endif // (__native_client__)
   return false;
 }
 
@@ -698,6 +733,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
 
 bool
 Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+#if !defined(__native_client__)
   // Get the status so we can determine if it's a file or directory.
   struct stat buf;
   if (0 != stat(path.c_str(), &buf)) {
@@ -742,18 +778,26 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
   if (rmdir(pathname.c_str()) != 0)
     return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
   return false;
+#else // (__native_client__)
+  MakeErrMsg(ErrStr, ": PNACL does not know how to erase directories!");
+  return false;
+#endif // (__native_client__)
+
 }
 
 bool
 Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+#if !defined(__native_client__)
   if (0 != ::rename(path.c_str(), newName.c_str()))
     return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
                newName.str() + "'");
+#endif
   return false;
 }
 
 bool
 Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+#if !defined(__native_client__)
   struct utimbuf utb;
   utb.actime = si.modTime.toPosixTime();
   utb.modtime = utb.actime;
@@ -761,6 +805,7 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
     return MakeErrMsg(ErrStr, path + ": can't set file modification time");
   if (0 != ::chmod(path.c_str(),si.mode))
     return MakeErrMsg(ErrStr, path + ": can't set mode");
+#endif // (__native_client__)
   return false;
 }
 
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index a5630b9ec9..4e71b42be9 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -108,7 +108,9 @@ error_code current_path(SmallVectorImpl<char> &result) {
 // For GNU Hurd
   result.reserve(1024);
 #endif
-
+#ifdef __native_client__
+  llvm_unreachable("current_path() not implemented for Native Client");
+#else
   while (true) {
     if (::getcwd(result.data(), result.capacity()) == 0) {
       // See if there was a real error.
@@ -121,6 +123,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
   }
 
   result.set_size(strlen(result.data()));
+#endif
   return error_code::success();
 }
 
@@ -184,6 +187,9 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
 }
 
 error_code create_directory(const Twine &path, bool &existed) {
+#ifdef __native_client__
+  llvm_unreachable("create_directory() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -195,9 +201,13 @@ error_code create_directory(const Twine &path, bool &existed) {
     existed = false;
 
   return error_code::success();
+#endif
 }
 
 error_code create_hard_link(const Twine &to, const Twine &from) {
+#ifdef __native_client__
+  llvm_unreachable("create_hard_link() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -208,9 +218,13 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code create_symlink(const Twine &to, const Twine &from) {
+#ifdef __native_client__
+  llvm_unreachable("create_symlink() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -221,9 +235,13 @@ error_code create_symlink(const Twine &to, const Twine &from) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code remove(const Twine &path, bool &existed) {
+#ifdef __native_client__
+  llvm_unreachable("remove() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -233,11 +251,14 @@ error_code remove(const Twine &path, bool &existed) {
     existed = false;
   } else
     existed = true;
-
   return error_code::success();
+#endif
 }
 
 error_code rename(const Twine &from, const Twine &to) {
+#ifdef __native_client__
+  llvm_unreachable("rename() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -257,9 +278,13 @@ error_code rename(const Twine &from, const Twine &to) {
   }
 
   return error_code::success();
+#endif
 }
 
 error_code resize_file(const Twine &path, uint64_t size) {
+#ifdef __native_client__
+  llvm_unreachable("resize_file() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -267,6 +292,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code exists(const Twine &path, bool &result) {
@@ -350,6 +376,9 @@ error_code status(const Twine &path, file_status &result) {
 error_code unique_file(const Twine &model, int &result_fd,
                        SmallVectorImpl<char> &result_path,
                        bool makeAbsolute, unsigned mode) {
+#ifdef __native_client__
+  llvm_unreachable("unique_file() not implemented for Native Client");
+#else
   SmallString<128> Model;
   model.toVector(Model);
   // Null terminate.
@@ -423,6 +452,7 @@ rety_open_create:
 
   result_fd = RandomFD;
   return error_code::success();
+#endif
 }
 
 error_code detail::directory_iterator_construct(detail::DirIterState &it,
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 4e1bd5db14..cbd9d41ce7 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -35,6 +35,8 @@
 #  include <termios.h>
 #endif
 
+#include <sys/unistd.h>
+
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only generic UNIX code that
 //===          is guaranteed to work on *all* UNIX variants.
@@ -53,9 +55,10 @@ Process::GetPageSize()
   const int page_size = 0x1000;
 #elif defined(HAVE_GETPAGESIZE)
   const int page_size = ::getpagesize();
-#elif defined(HAVE_SYSCONF)
+#elif defined(HAVE_SYSCONF)  && !defined(__native_client__)
   long page_size = ::sysconf(_SC_PAGE_SIZE);
 #else
+  const int page_size = 0;
 #warning Cannot get the page size on this machine
 #endif
   return static_cast<unsigned>(page_size);
@@ -110,7 +113,7 @@ Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
                       TimeValue& sys_time)
 {
   elapsed = TimeValue::now();
-#if defined(HAVE_GETRUSAGE)
+#if defined(HAVE_GETRUSAGE) && !defined(__native_client__)
   struct rusage usage;
   ::getrusage(RUSAGE_SELF, &usage);
   user_time = TimeValue(
@@ -131,11 +134,23 @@ Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
 }
 
 int Process::GetCurrentUserId() {
+#if !defined(__native_client__)
   return getuid();
+#else // (__native_client__)
+// TODO(abetul): What the proper return value should be for this function?
+// What about having a reserved user_id or the user "nobody" for PNACL?
+  return -1;
+#endif // (__native_client__)
 }
 
 int Process::GetCurrentGroupId() {
+#if !defined(__native_client__)
   return getgid();
+#else // (__native_client__)
+// TODO(abetul): What the proper return value should be for this function?
+// What about having a reserved/unused group_id?  
+  return -1;
+#endif // (__native_client__)
 }
 
 #if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
@@ -329,3 +344,6 @@ unsigned llvm::sys::Process::GetRandomNumber() {
   return ::rand();
 #endif
 }
+
+#if !defined(__native_client__)
+#endif
+\ No newline at end of file
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index e5990d06ec..049c41b742 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -103,6 +103,10 @@ Program::FindProgramByName(const std::string& progName) {
 }
 
 static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+#if defined(__native_client__)
+  MakeErrMsg(ErrMsg, "Cannot redirect I/O in NaCl");
+  return true;
+#else // (__native_client__)
   if (Path == 0) // Noop
     return false;
   const char *File;
@@ -119,7 +123,6 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
               + (FD == 0 ? "input" : "output"));
     return true;
   }
-
   // Install it as the requested FD
   if (dup2(InFD, FD) == -1) {
     MakeErrMsg(ErrMsg, "Cannot dup2");
@@ -128,6 +131,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   }
   close(InFD);      // Close the original FD
   return false;
+#endif // (__native_client__)
 }
 
 #ifdef HAVE_POSIX_SPAWN
@@ -233,6 +237,7 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   }
 #endif
 
+#if !defined(__native_client__)
   // Create a child process.
   int child = fork();
   switch (child) {
@@ -293,6 +298,10 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   Data_ = reinterpret_cast<void*>(child);
 
   return true;
+#else // (__native_client__)
+  MakeErrMsg(ErrMsg, "PNACL does not know how to execute child processes!");
+  return false;
+#endif // (__native_client__)
 }
 
 int
@@ -300,6 +309,7 @@ Program::Wait(const sys::Path &path,
               unsigned secondsToWait,
               std::string* ErrMsg)
 {
+#if !defined(__native_client__)
 #ifdef HAVE_SYS_WAIT_H
   struct sigaction Act, Old;
 
@@ -392,10 +402,16 @@ Program::Wait(const sys::Path &path,
     *ErrMsg = "Program::Wait is not implemented on this platform yet!";
   return -1;
 #endif
+#else // (__native_client__)
+// TODO(abetul): What should the proper return value be here?
+  MakeErrMsg(ErrMsg, "PNACL does not know how to wait for a child process!");
+  return -1;
+#endif // (__native_client__)
 }
 
 bool
 Program::Kill(std::string* ErrMsg) {
+#if !defined(__native_client__)
   if (Data_ == 0) {
     MakeErrMsg(ErrMsg, "Process not started!");
     return true;
@@ -410,6 +426,12 @@ Program::Kill(std::string* ErrMsg) {
   }
 
   return false;
+
+#else // (__native_client__)
+  MakeErrMsg(ErrMsg, "PNACL does not know how to kill processes!");
+  return true;
+#endif // (__native_client__)
+
 }
 
 error_code Program::ChangeStdinToBinary(){
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index c9ec9fce9a..130b11b93d 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -81,6 +81,7 @@ static struct {
 
 
 static void RegisterHandler(int Signal) {
+#if !defined(__native_client__)
   assert(NumRegisteredSignals <
          sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
          "Out of space for signal handlers!");
@@ -96,6 +97,7 @@ static void RegisterHandler(int Signal) {
             &RegisteredSignalInfo[NumRegisteredSignals].SA);
   RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
   ++NumRegisteredSignals;
+#endif // (__native_client__)
 }
 
 static void RegisterHandlers() {
@@ -107,11 +109,13 @@ static void RegisterHandlers() {
 }
 
 static void UnregisterHandlers() {
+#if !defined(__native_client__)
   // Restore all of the signal handlers to how they were before we showed up.
   for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
     sigaction(RegisteredSignalInfo[i].SigNo,
               &RegisteredSignalInfo[i].SA, 0);
   NumRegisteredSignals = 0;
+#endif // (__native_client__)
 }
 
 
@@ -132,10 +136,12 @@ static RETSIGTYPE SignalHandler(int Sig) {
   // instead of recursing in the signal handler.
   UnregisterHandlers();
 
+#if !defined(__native_client__)
   // Unmask all potentially blocked kill signals.
   sigset_t SigMask;
   sigfillset(&SigMask);
   sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+#endif
 
   SignalsMutex.acquire();
   RemoveFilesToRemove();
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index 5cf5a9d44e..0eb4ac8ad3 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -18,6 +18,13 @@
 
 #include "Unix.h"
 
+// @LOCALMOD-START
+#ifndef timerclear
+// Newlib does not have the timer{clear,add,sub} macros
+#define timerclear(tvp)  ((tvp)->tv_sec = (tvp)->tv_usec = 0)
+#endif
+// @LOCALMOD-END
+
 namespace llvm {
   using namespace sys;
 
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 2a1e8e4d30..9a8cab8ecc 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -20,6 +20,9 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
+// @LOCALMOD (for LowerARMMachineInstrToMCInstPCRel)
+#include "llvm/MC/MCSymbol.h"
+
 namespace llvm {
 
 class ARMAsmPrinter;
@@ -43,9 +46,27 @@ FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
+/* @LOCALMOD-START */
+FunctionPass *createARMNaClRewritePass();
+/* @LOCALMOD-END */
+
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+                                          
+/* @LOCALMOD-START */
+// Used to lower the pc-relative MOVi16PIC / MOVTi16PIC pseudo instructions
+// into the real MOVi16 / MOVTi16 instructions.
+// See comment on MOVi16PIC for more details.
+void LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI,
+                                       MCInst &OutMI,
+                                       ARMAsmPrinter &AP,
+                                       unsigned ImmIndex,
+                                       unsigned PCIndex,
+                                       MCSymbol *PCLabel,
+                                       unsigned PCAdjustment);
+/* @LOCALMOD-END */
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 9b0cb0c9e5..10d7f56c7f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -205,8 +205,12 @@ def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
 
 // V7a Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
-                                    [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS]>;
+// @LOCALMOD-BEGIN
+// TODO(pdox): Resolve this mismatch.
+                                    [ProcA8, HasV7Ops, FeatureDB]>;
+// FeatureNEON, FeatureDSPThumb2, FeatureHasRAS]>;
+// @LOCALMOD-END
+
 def : Processor<"cortex-a9",        CortexA9Itineraries,
                                     [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
                                      FeatureDSPThumb2, FeatureHasRAS]>;
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 5bff9fb756..967c0a8462 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -51,6 +51,13 @@
 #include <cctype>
 using namespace llvm;
 
+// @LOCALMOD-START
+namespace llvm {
+  extern cl::opt<bool> FlagSfiBranch;
+  extern cl::opt<bool> FlagSfiData;
+}
+// @LOCALMOD-END
+
 namespace {
 
   // Per section and per symbol attributes are not supported.
@@ -222,6 +229,75 @@ getDebugValueLocation(const MachineInstr *MI) const {
   return Location;
 }
 
+// @LOCALMOD-START
+// Make sure all jump targets are aligned and also all constant pools
+void NaclAlignAllJumpTargetsAndConstantPools(MachineFunction &MF) {
+  // JUMP TABLE TARGETS
+  MachineJumpTableInfo *jt_info = MF.getJumpTableInfo();
+  if (jt_info) {
+    const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables();
+    for (unsigned i=0; i < JT.size(); ++i) {
+      std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs;
+
+      for (unsigned j=0; j < MBBs.size(); ++j) {
+        if (MBBs[j]->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+          continue;
+        }
+        MBBs[j]->setAlignment(4);
+      }
+    }
+  }
+
+  // FIRST ENTRY IN A ConstanPool
+  bool last_bb_was_constant_pool = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    if (I->isLandingPad()) {
+        I->setAlignment(4);
+    }
+
+    if (I->empty()) continue;
+
+    bool is_constant_pool = I->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY;
+
+    if (last_bb_was_constant_pool != is_constant_pool) {
+      I->setAlignment(4);
+    }
+
+    last_bb_was_constant_pool = is_constant_pool;
+  }
+}
+
+bool ARMAsmPrinter::UseReadOnlyJumpTables() const {
+  if (Subtarget->isTargetNaCl())
+    return true;
+  return false;
+}
+
+unsigned ARMAsmPrinter::GetTargetBasicBlockAlign() const {
+  if (Subtarget->isTargetNaCl())
+    return 4;
+  return 0;
+}
+
+unsigned ARMAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 4;
+    }
+  }
+  return 0;
+}
+// @LOCALMOD-END
+
 /// EmitDwarfRegOp - Emit dwarf register operation.
 void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
@@ -298,6 +374,17 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
     OutStreamer.EmitThumbFunc(CurrentFnSym);
   }
 
+  // @LOCALMOD-START
+  // make sure function entry is aligned. We use  XmagicX as our basis
+  // for alignment decisions (c.f. assembler sfi macros)
+  int alignment = MF->getAlignment();
+  if (alignment < 4) alignment = 4;
+  EmitAlignment(alignment);
+  if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
+    OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n"));
+  }
+  // @LOCALMOD-END
+ 
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
@@ -324,6 +411,11 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   AFI = MF.getInfo<ARMFunctionInfo>();
   MCP = MF.getConstantPool();
 
+  // @LOCALMOD-START
+  if (FlagSfiBranch) {
+    NaclAlignAllJumpTargetsAndConstantPools(MF);
+  }
+  // @LOCALMOD-END
   return AsmPrinter::runOnMachineFunction(MF);
 }
 
@@ -359,10 +451,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
-        (TF & ARMII::MO_LO16))
+        (TF == ARMII::MO_LO16)) // @LOCALMOD: TEMPORARY FIX
       O << ":lower16:";
     else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
-             (TF & ARMII::MO_HI16))
+             (TF == ARMII::MO_HI16)) // @LOCALMOD: TEMPORARY FIX
       O << ":upper16:";
     O << *Mang->getSymbol(GV);
 
@@ -388,6 +480,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 
 //===--------------------------------------------------------------------===//
 
+
 MCSymbol *ARMAsmPrinter::
 GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
                             const MachineBasicBlock *MBB) const {
@@ -563,6 +656,8 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+void EmitSFIHeaders(raw_ostream &O);
+
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     Reloc::Model RelocM = TM.getRelocationModel();
@@ -607,8 +702,16 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF())
     emitAttributes();
-}
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
+    std::string str;
+    raw_string_ostream OS(str);
+    EmitSFIHeaders(OS);
+    OutStreamer.EmitRawText(StringRef(OS.str()));
+  }
+  // @LOCALMOD-END
+}
 
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
@@ -678,6 +781,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
   }
 }
 
+
 //===----------------------------------------------------------------------===//
 // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
 // FIXME:
@@ -928,7 +1032,20 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
       PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
     }
     Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+  } else {   // @LOCALMOD-BEGIN
+    // Check mustAddCurrentAddress() when getPCAdjustment() == 0,
+    // and make it actually *Subtract* the current address.
+    // A more appropriate name is probably "relativeToCurrentAddress",
+    // since the assembler can't actually handle "X + .", only "X - .".
+    if (ACPV->mustAddCurrentAddress()) {
+      MCSymbol *DotSym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(DotSym);
+      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+      Expr = MCBinaryExpr::CreateSub(Expr, DotExpr, OutContext);
+    }
   }
+  // @LOCALMOD-END
+
   OutStreamer.EmitValue(Expr, Size);
 }
 
@@ -1587,6 +1704,28 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       InConstantPool = true;
     }
 
+    
+    // @LOCALMOD-START
+    // NOTE: we also should make sure that the first data item
+    // is not in a code bundle
+    // NOTE: there may be issues with alignment constraints
+    if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
+      const unsigned size = MI->getOperand(2).getImm();
+      //assert(size == 4 || size == 8 && "Unsupported data item size");
+      if (size == 8) {
+        // we cannot generate a size 8 constant at offset 12 (mod 16)
+        OutStreamer.EmitRawText(StringRef("sfi_nop_if_at_bundle_end\n"));
+      }
+
+      if (FlagSfiData) {
+        SmallString<128> Str;
+        raw_svector_ostream OS(Str);
+        OS << "sfi_illegal_if_at_bundle_begining  @ ========== SFI (" << 
+          size << ")\n";
+        OutStreamer.EmitRawText(OS.str());
+      }
+    }
+    // @LOCALMOD-END
     OutStreamer.EmitLabel(GetCPISymbol(LabelId));
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
@@ -2015,6 +2154,50 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
+
+  // @LOCALMOD-BEGIN
+  // These are pseudo ops for MOVW / MOVT with operands relative to a PC label.
+  // See the comments on MOVi16PIC in the .td file for more details.
+  case ARM::MOVi16PIC: {
+    MCInst TmpInst;
+    // First, build an instruction w/ the real opcode.
+    TmpInst.setOpcode(ARM::MOVi16);
+
+    unsigned ImmIndex = 1;
+    unsigned PIC_id_index = 2;
+    unsigned PCAdjustment = 8;
+    // NOTE: if getPICLabel was a method of "this", or otherwise in scope for
+    // LowerARMMachineInstrToMCInstPCRel, then we wouldn't need to create
+    // it here (as well as below).
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    MI->getOperand(PIC_id_index).getImm(),
+                                    OutContext);
+    LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex,
+                                      PIC_id_index, PCLabel, PCAdjustment);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVTi16PIC: {
+    MCInst TmpInst;
+    // First, build an instruction w/ the real opcode.
+    TmpInst.setOpcode(ARM::MOVTi16);
+
+    unsigned ImmIndex = 2;
+    unsigned PIC_id_index = 3;
+    unsigned PCAdjustment = 8;
+
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    MI->getOperand(PIC_id_index).getImm(),
+                                    OutContext);
+
+    LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex,
+                                      PIC_id_index, PCLabel, PCAdjustment);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  //@LOCALMOD-END
   }
 
   MCInst TmpInst;
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 3555e8f50a..273f85026d 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -72,8 +72,18 @@ public:
   virtual void EmitInstruction(const MachineInstr *MI);
   bool runOnMachineFunction(MachineFunction &F);
 
-  virtual void EmitConstantPool() {} // we emit constant pools customly!
   virtual void EmitFunctionBodyEnd();
+
+  // @LOCALMOD-START
+  // usually this does nothing on ARM as constants pools
+  // are handled with custom code.
+  // For the sfi case we do not use the custom logic and fall back
+  // to the default implementation.
+  virtual void EmitConstantPool() {
+    if (FlagSfiDisableCP) AsmPrinter::EmitConstantPool();
+  }
+  // @LOCALMOD-END
+
   virtual void EmitFunctionEntryLabel();
   void EmitStartOfAsmFile(Module &M);
   void EmitEndOfAsmFile(Module &M);
@@ -81,6 +91,17 @@ public:
 
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+  
+  // @LOCALMOD-START
+  /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata.
+  virtual bool UseReadOnlyJumpTables() const;
+  /// GetTargetBasicBlockAlign - Get the target alignment for basic blocks.
+  virtual unsigned GetTargetBasicBlockAlign() const;
+  /// GetTargetLabelAlign - Get optional alignment for TargetOpcode
+  /// labels E.g., EH_LABEL.
+  /// TODO(sehr,robertm): remove this if the labeled block has address taken.
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;
+  // @LOCALMOD-END
 
 private:
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6bed1371fe..ee942629ec 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1589,6 +1589,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
 
     // Build the new ADD / SUB.
     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+
     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
       .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
       .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 231bd26c54..533ed8834b 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -17,6 +17,7 @@
 #include "ARMFrameLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"  // @LOCALMOD
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -60,14 +61,17 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
     BasePtr(ARM::R6) {
 }
 
+extern cl::opt<bool> ReserveR9; // @LOCALMOD
 const uint16_t*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  if (ReserveR9) return CSR_NaCl_SaveList; // @LOCALMOD
   return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
     ? CSR_iOS_SaveList : CSR_AAPCS_SaveList;
 }
 
 const uint32_t*
 ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  if (ReserveR9) return CSR_NaCl_RegMask; // @LOCALMOD
   return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
     ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
 }
@@ -696,6 +700,13 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   unsigned DestReg, unsigned SubIdx, int Val,
                   ARMCC::CondCodes Pred,
                   unsigned PredReg, unsigned MIFlags) const {
+  // @LOCALMOD-START
+  // In the sfi case we do not want to use the load const pseudo instr.
+  // Sadly, the ARM backend is not very consistent about using this
+  // pseudo instr. and hence checking this is not sufficient.
+  // But, it should help detect some regressions early.
+  assert(!FlagSfiDisableCP && "unexpected call to emitLoadConstPool");
+  // @LOCALMOD-END
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C =
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index b9a25126ba..1a79d95f95 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -86,6 +86,10 @@ def RetFastCC_ARM_APCS : CallingConv<[
 
 def CC_ARM_AAPCS_Common : CallingConv<[
 
+  // @LOCALMOD-BEGIN (PR11018)
+  CCIfByVal<CCPassByVal<4, 4>>,
+  // @LOCALMOD-END
+
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // i64/f64 is passed in even pairs of GPRs
@@ -171,3 +175,9 @@ def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
 // iOS ABI deviates from ARM standard ABI. R9 is not a callee-saved register.
 // Also save R7-R4 first to match the stack frame fixed spill areas.
 def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
+
+// @LOCALMOD-START
+// NaCl does not save R9, but otherwise uses the same order as AAPCS
+def CSR_NaCl : CalleeSavedRegs<(add LR, R11, R10, R8, R7, R6, R5, R4,
+                                     (sequence "D%u", 15, 8))>;
+// @LOCALMOD-END
+\ No newline at end of file
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 10e9da42a9..bf9baf7c19 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -374,6 +374,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
 }
 
 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+  if (FlagSfiDisableCP) return false;   // @LOCALMOD
   MF = &mf;
   MCP = mf.getConstantPool();
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6b98d446b0..f523097d2c 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -81,6 +81,9 @@ public:
   bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
   bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
   bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
+  // @LOCALMOD-START
+  bool isValue() const { return Kind == ARMCP::CPValue; }
+  // @LOCALMOD-END
 
   virtual unsigned getRelocationInfo() const { return 2; }
 
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index ac9163f13d..c386a01e89 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h" // @LOCALMOD for llvm::TLSUseCall
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
@@ -43,6 +44,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
+    bool IsRelocPIC; // @LOCALMOD
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -63,6 +65,16 @@ namespace {
                     unsigned Opc, bool IsExt);
     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI);
+    // @LOCALMOD-BEGIN
+    void AddPICADD_MOVi16_PICID(MachineInstr &MI,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator &MBBI,
+                                bool NotThumb,
+                                unsigned PredReg, ARMCC::CondCodes Pred,
+                                unsigned DstReg, bool DstIsDead,
+                                MachineInstrBuilder &LO16,
+                                MachineInstrBuilder &HI16);
+    // @LOCALMOD-END
   };
   char ARMExpandPseudo::ID = 0;
 }
@@ -477,13 +489,46 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   if (SrcIsKill) // Add an implicit kill for the super-reg.
     MIB->addRegisterKilled(SrcReg, TRI, true);
   TransferImpOps(MI, MIB, MIB);
-
   // Transfer memoperands.
   MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-
   MI.eraseFromParent();
 }
 
+// @LOCALMOD-BEGIN
+// AddPICADD_MOVi16_PICID - Inserts a PICADD into the given basic block,
+// and adds the PC label ID (of the PICADD) as an operand of the LO16 / HI16
+// MOVs. The ID operand will follow the "Immediate" operand (assumes that
+// operand is already added).
+void ARMExpandPseudo::AddPICADD_MOVi16_PICID(MachineInstr &MI,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator &MBBI,
+                                       bool NotThumb,
+                                       unsigned PredReg, ARMCC::CondCodes Pred,
+                                       unsigned DstReg, bool DstIsDead,
+                                       MachineInstrBuilder &LO16,
+                                       MachineInstrBuilder &HI16) {
+  // Throw in a PICADD, and tack on the PC label ID to the MOVT/MOVWs
+  MachineFunction &MF = *MI.getParent()->getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Make a unique ID for this PC by pulling from pool of constPoolIDs
+  unsigned PC_ID = AFI->createPICLabelUId();
+  MachineInstrBuilder PicADD =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(NotThumb ? ARM::PICADD : ARM::tPICADD))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstReg)
+      .addImm(PC_ID)
+      .addImm(Pred)
+      .addReg(PredReg);
+  (void)PicADD; // squelch unused warning.
+
+  // Add the PC label ID after what would have been an absolute address.
+  LO16 = LO16.addImm(PC_ID);
+  HI16 = HI16.addImm(PC_ID);
+}
+// @LOCALMOD-END
+
 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
 /// register operands to real instructions with D register operands.
 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
@@ -644,7 +689,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
 
   unsigned LO16Opc = 0;
   unsigned HI16Opc = 0;
-  if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+  // @LOCALMOD
+  bool isThumb2 = (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm);
+  if (isThumb2) {
     LO16Opc = ARM::t2MOVi16;
     HI16Opc = ARM::t2MOVTi16;
   } else {
@@ -652,10 +699,28 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     HI16Opc = ARM::MOVTi16;
   }
 
+  // @LOCALMOD-BEGIN
+  // If constant pools are "disabled" (actually, moved to rodata), then
+  // many addresses (e.g., the addresses of what used to be the "pools")
+  // may not be materialized in a pc-relative manner, because MOVT / MOVW
+  // are used to materialize the addresses.
+  // We need to know if it matters that references are pc-relative
+  // (e.g., to be PIC).
+  // See the comments on MOVi16PIC / MOVTi16PIC for more details.
+  const bool ShouldUseMOV16PIC = FlagSfiDisableCP && IsRelocPIC &&
+      (MO.isCPI() || MO.isJTI() || MO.isGlobal()); // TODO check this list.
+  if (ShouldUseMOV16PIC) {
+    if (isThumb2)
+      llvm_unreachable("FIXME: add PIC versions of t2MOVi16");
+    LO16Opc = ARM::MOVi16PIC;
+    HI16Opc = ARM::MOVTi16PIC;
+  }
+  // @LOCALMOD-END
+
   LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
   HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-    .addReg(DstReg);
+    .addReg(DstReg, RegState::Kill); // @LOCALMOD
 
   if (MO.isImm()) {
     unsigned Imm = MO.getImm();
@@ -663,13 +728,31 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     unsigned Hi16 = (Imm >> 16) & 0xffff;
     LO16 = LO16.addImm(Lo16);
     HI16 = HI16.addImm(Hi16);
-  } else {
+  } else if (MO.isGlobal()) { // @LOCALMOD
     const GlobalValue *GV = MO.getGlobal();
     unsigned TF = MO.getTargetFlags();
     LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
     HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+  // @LOCALMOD-START - support for jumptable addresses and CPI
+  } else if (MO.isCPI()) {
+    int i = MO.getIndex();
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_LO16);
+    HI16 = HI16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_HI16);
+  } else if (MO.isJTI()){
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_LO16);
+    HI16 = HI16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_HI16);
+  } else {
+    assert (0 && "unexpected operand");
+  // @LOCALMOD-END
   }
-
+  // @LOCALMOD-BEGIN
+  if (ShouldUseMOV16PIC) {
+    AddPICADD_MOVi16_PICID(MI, MBB, MBBI, !isThumb2,
+                           PredReg, Pred, DstReg, DstIsDead, LO16, HI16);
+  }
+  // @LOCALMOD-END
   LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   LO16.addImm(Pred).addReg(PredReg);
@@ -849,13 +932,27 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     }
     case ARM::tTPsoft:
     case ARM::TPsoft: {
+      // @LOCALMOD-BEGIN
+      if (llvm::TLSUseCall) {
+        // Don't add implicit uses/defs for this call, otherwise
+        // liveness analysis passes get confused.
       MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          BuildMI_NoImp(MBB, MBBI, MI.getDebugLoc(), // @LOCALMOD
                 TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
-        .addExternalSymbol("__aeabi_read_tp", 0);
+          .addExternalSymbol("__aeabi_read_tp", 0);
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      TransferImpOps(MI, MIB, MIB);
+        TransferImpOps(MI, MIB, MIB);
+      } else {
+        // Inline version for native client.
+        // See native_client/src/untrusted/stubs/aeabi_read_tp.S
+        // mov r0, r9
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr),
+                               ARM::R0)
+                       .addReg(ARM::R9))
+        .addReg(0); // Doesn't use/modify CPSR.
+      }
+      // @LOCALMOD-END
       MI.eraseFromParent();
       return true;
     }
@@ -1211,6 +1308,62 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
     case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
     case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+
+    // @LOCALMOD-BEGIN
+    case ARM::ARMeh_return: {
+      // This pseudo instruction is generated as part of the lowering of
+      // ISD::EH_RETURN (c.f. ARMISelLowering.cpp)
+      // we convert it to a stack increment by OffsetReg and
+      // indirect jump to TargetReg
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+      unsigned OffsetReg = MI.getOperand(0).getReg();
+      unsigned TargetReg = MI.getOperand(1).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ADDrr), ARM::SP)
+          .addReg(OffsetReg)
+          .addReg(ARM::SP)
+          .addImm(Pred)
+          .addReg(PredReg)
+          .addReg(0);
+
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BX))
+          .addReg(TargetReg);
+      MI.eraseFromParent();
+      break;
+    }
+    case ARM::MOVGOTAddr : {
+      // Expand the pseudo-inst that requests for the GOT address
+      // to be materialized into a register. We use MOVW/MOVT for this.
+      // See ARMISelLowering.cpp for a comment on the strategy.
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MachineInstrBuilder LO16, HI16;
+
+      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                     TII->get(ARM::MOVi16PIC),
+                     DstReg)
+        .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_LO16);
+
+      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                     TII->get(ARM::MOVTi16PIC))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg)
+        .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_HI16);
+
+      AddPICADD_MOVi16_PICID(MI, MBB, MBBI, true,
+                             PredReg, Pred, DstReg, DstIsDead, LO16, HI16);
+
+      (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      LO16.addImm(Pred).addReg(PredReg);
+      HI16.addImm(Pred).addReg(PredReg);
+      TransferImpOps(MI, LO16, HI16);
+      MI.eraseFromParent();
+      break;
+    }
+    // @LOCALMOD-END
   }
 }
 
@@ -1233,6 +1386,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   TRI = TM.getRegisterInfo();
   STI = &TM.getSubtarget<ARMSubtarget>();
   AFI = MF.getInfo<ARMFunctionInfo>();
+  IsRelocPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index a9b482ae2d..2158b7e028 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -693,6 +693,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
 }
 
 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+  // @LOCALMOD-START
+  // In the sfi case we do not want to use the ARM custom cp handling.
+  // This assert should help detect some regressions early.
+  assert(!FlagSfiDisableCP && "unexpected call to TargetMaterializeConstant");
+  // @LOCALMOD-END
   EVT VT = TLI.getValueType(C->getType(), true);
 
   // Only handle simple types.
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 2629496cc5..72ec16bbe2 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -24,6 +24,9 @@
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
+// @LOCALMOD-START
+#include "llvm/CodeGen/MachineModuleInfo.h"
+// @LOCALMOD-END
 
 using namespace llvm;
 
@@ -151,6 +154,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   int FramePtrSpillFI = 0;
   int D8SpillFI = 0;
 
+  // @LOCALMOD-START
+  MachineModuleInfo &MMI = MF.getMMI();
+  // This condition was gleaned from x86 / PowerPC / XCore
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+                         !MF.getFunction()->doesNotThrow() ||
+                         MF.getFunction()->needsUnwindTableEntry();
+  // @LOCALMOD-END
+
   // Allocate the vararg register save area. This is not counted in NumBytes.
   if (VARegSaveSize)
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
@@ -205,6 +216,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Move past area 1.
   if (GPRCS1Size > 0) MBBI++;
 
+  // @LOCALMOD-START
+  if (needsFrameMoves && GPRCS1Size > 0) {
+    // we just skipped the initial callee save reg instructions, e.g.
+    // push {r4, r5, r6, lr}
+    // NOTE: this likely is not the right thing to do for darwin as it does not
+    //       treat all callee save regs uniformly
+    MCSymbol *AfterRegSave = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::PROLOG_LABEL)).addSym(AfterRegSave);
+    // record the fact that the stack has moved
+    MachineLocation dst(MachineLocation::VirtualFP);
+    MachineLocation src(MachineLocation::VirtualFP, -GPRCS1Size);
+    MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src));
+    // for each callee saved register record where it has been saved
+    int offset = 0;
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      switch (Reg) {
+       case ARM::R4:
+       case ARM::R5:
+       case ARM::R6:
+       case ARM::R7:
+       case ARM::R8:
+       case ARM::R9:
+       case ARM::R10:
+       case ARM::R11:
+       case ARM::LR:
+        offset -= 4;
+        MachineLocation dst(MachineLocation::VirtualFP, offset);
+        MachineLocation src(Reg);
+        MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src));
+        break;
+      }
+    }
+  }
+  // @LOCALMOD-END
+
   // Set FP to point to the stack slot that contains the previous FP.
   // For iOS, FP is R7, which has now been stored in spill area 1.
   // Otherwise, if this is not iOS, all the callee-saved registers go
@@ -218,8 +265,29 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       .addFrameIndex(FramePtrSpillFI).addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
     AddDefaultCC(AddDefaultPred(MIB));
+    // @LOCALMOD-START
+    if (needsFrameMoves) {
+      // we just emitted the fp pointer setup instruction, e.g.
+      // add      r11, sp, #8
+      MCSymbol *AfterFramePointerInit = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::PROLOG_LABEL)).addSym(AfterFramePointerInit);
+      // record the fact that the frame pointer is now tracking the "cfa"
+      // Note, gcc and llvm have a slightly different notion of where the
+      // frame pointer should be pointing. gcc points after the return address
+      // and llvm one word further down (two words = 8).
+      // This should be fine as long as we are consistent.
+      // NOTE: this is related to the offset computed for
+      // ISD::FRAME_TO_ARGS_OFFSET
+      MachineLocation dst(MachineLocation::VirtualFP);
+      MachineLocation src(FramePtr, 8);
+      MMI.getFrameMoves().push_back(MachineMove(AfterFramePointerInit, dst, src));
+    }
+    // @LOCALMOD-END
   }
 
+
+
   // Move past area 2.
   if (GPRCS2Size > 0) MBBI++;
 
@@ -268,6 +336,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       // an inconsistent state (pointing to the middle of callee-saved area).
       // The interrupt handler can end up clobbering the registers.
       AFI->setShouldRestoreSPFromFP(true);
+
+    // @LOCALMOD-START
+    // we only track sp changes if do not have the fp to figure out where
+    // stack frame lives
+    if (needsFrameMoves && !HasFP) {
+      MCSymbol *AfterStackUpdate = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::PROLOG_LABEL)).addSym(AfterStackUpdate);
+      MachineLocation dst(MachineLocation::VirtualFP);
+      MachineLocation src(MachineLocation::VirtualFP, - NumBytes - GPRCS1Size);
+      MMI.getFrameMoves().push_back(MachineMove(AfterStackUpdate, dst, src));
+    }
+    // @LOCALMOD-END
   }
 
   if (STI.isTargetELF() && hasFP(MF))
@@ -658,7 +739,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
         continue;
 
-      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps() &&
+          false /* @LOCALMOD */) {
         Reg = ARM::PC;
         LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
         // Fold the return instruction into the LDM.
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index a1c2b93562..a2280db515 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -27,7 +27,8 @@ protected:
 
 public:
   explicit ARMFrameLowering(const ARMSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4,
+      4), // @LOCALMOD
       STI(sti) {
   }
 
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1953192a0c..36dbc3bc4e 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -35,8 +35,17 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+// @LOCALMOD-START
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+}
+// @LOCALMOD-END
+
 using namespace llvm;
 
+
 static cl::opt<bool>
 DisableShifterOp("disable-shifter-op", cl::Hidden,
   cl::desc("Disable isel of shifter-op"),
@@ -114,21 +123,24 @@ public:
   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 
-  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+  AddrMode2Type SelectAddrMode2Worker(SDNode *Op, SDValue N, SDValue &Base,
                                       SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2Base(SDNode *Op,
+                           SDValue N, SDValue &Base, SDValue &Offset,
                            SDValue &Opc) {
-    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+    return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_BASE;
   }
 
-  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2ShOp(SDNode *Op,
+                           SDValue N, SDValue &Base, SDValue &Offset,
                            SDValue &Opc) {
-    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+    return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_SHOP;
   }
 
-  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2(SDNode *Op, 
+                       SDValue N, SDValue &Base, SDValue &Offset,
                        SDValue &Opc) {
-    SelectAddrMode2Worker(N, Base, Offset, Opc);
+    SelectAddrMode2Worker(Op, N, Base, Offset, Opc);
 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
     // This always matches one way or another.
     return true;
@@ -141,7 +153,7 @@ public:
   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
-  bool SelectAddrMode3(SDValue N, SDValue &Base,
+  bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
@@ -438,6 +450,22 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
   return true;
 }
 
+// @LOCALMOD-START
+static bool ShouldOperandBeUnwrappedForUseAsBaseAddress(
+  SDValue& N, const ARMSubtarget* Subtarget) {
+  assert (N.getOpcode() == ARMISD::Wrapper);
+  // Never use this transformation if constant island pools are disallowed 
+  if (FlagSfiDisableCP) return false;
+
+  // always apply this when we do not have movt/movw available
+  // (if we do have movt/movw we be able to get rid of the
+  // constant pool entry altogether)
+  if (!Subtarget->useMovt()) return true;
+  // explain why we do not want to use this for TargetGlobalAddress
+  if (N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) return true;
+  return false;
+}
+// @LOCALMOD-END
 
 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
                                           SDValue &Base,
@@ -456,8 +484,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
     }
 
     if (N.getOpcode() == ARMISD::Wrapper &&
-        !(Subtarget->useMovt() &&
-                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+        // @LOCALMOD
+        ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     } else
       Base = N;
@@ -491,6 +519,11 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 
 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
+  // @LOCALMOD-BEGIN
+  // Disallow offsets of Reg + Reg (which may escape sandbox).
+  if (Subtarget->isTargetNaCl())
+    return false;
+  // @LOCALMOD-END
   if (N.getOpcode() == ISD::MUL &&
       (!Subtarget->isCortexA9() || N.hasOneUse())) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -585,10 +618,24 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 
 //-----
 
-AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
+                                                     SDValue N,
                                                      SDValue &Base,
                                                      SDValue &Offset,
+// @LOCALMOD-START
+// Note: In the code below we do not want "Offset" to be real register to
+// not violate ARM sandboxing.
+// @LOCALMOD-END
                                                      SDValue &Opc) {
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  // This is neither a sandboxable load nor a sandboxable store.
+  if (!restrict_addressing_modes_for_nacl) {
+  // @LOCALMOD-END
+
   if (N.getOpcode() == ISD::MUL &&
       (!Subtarget->isCortexA9() || N.hasOneUse())) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -612,6 +659,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       }
     }
   }
+  } // @LOCALMOD
 
   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
       // ISD::OR that is equivalent to an ADD.
@@ -621,8 +669,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               !(Subtarget->useMovt() &&
-                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+               // @LOCALMOD
+               ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
@@ -655,7 +703,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       return AM2_BASE;
     }
   }
-
+  
   if (Subtarget->isCortexA9() && !N.hasOneUse()) {
     // Compute R +/- (R << N) and reuse it.
     Base = N;
@@ -665,6 +713,24 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
                                     MVT::i32);
     return AM2_BASE;
   }
+  
+  // @LOCALMOD-START
+  // Keep load and store addressing modes simple
+  if (restrict_addressing_modes_for_nacl) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+  // @LOCALMOD-END
 
   // Otherwise this is R +/- [possibly shifted] R.
   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
@@ -732,13 +798,27 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
     return false;
 
+  // @LOCALMOD-BEGIN
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  // @LOCALMOD-END
+
+
   Offset = N;
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
   unsigned ShAmt = 0;
   if (ShOpcVal != ARM_AM::no_shift) {
     // Check to see if the RHS of the shift is a constant, if not, we can't fold
     // it.
-    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+
+    //if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    // @LOCALMOD-BEGIN
+    // Neither a sandboxable load nor a sandboxable store.
+    if (!restrict_addressing_modes_for_nacl && Sh ) {
+    // @LOCALMOD-END
       ShAmt = Sh->getZExtValue();
       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
         Offset = N.getOperand(0);
@@ -801,16 +881,25 @@ bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  if (!restrict_addressing_modes_for_nacl) {
+  // @LOCALMOD-END
   if (N.getOpcode() == ISD::SUB) {
+
     // X - C  is canonicalize to X + -C, no need to handle it here.
     Base = N.getOperand(0);
     Offset = N.getOperand(1);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
     return true;
   }
+  } // @LOCALMOD-END
 
   if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
@@ -843,6 +932,16 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     return true;
   }
 
+  // @LOCALMOD-START
+  // A sandboxable load or a sandboxable store.
+  if (restrict_addressing_modes_for_nacl) {
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+  // @LOCALMOD-END
+
   Base = N.getOperand(0);
   Offset = N.getOperand(1);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
@@ -877,8 +976,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               !(Subtarget->useMovt() &&
-                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+               // @LOCALMOD
+               ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -2588,6 +2687,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                  !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
     }
 
+    if (FlagSfiDisableCP) UseCP = false; // @LOCALMOD
+
     if (UseCP) {
       SDValue CPIdx =
         CurDAG->getTargetConstantPool(ConstantInt::get(
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c48fa763b4..89bed8b823 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,15 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+
+// @LOCALMOD-START
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiDisableCP;
+}
+// @LOCALMOD-END
+
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
@@ -71,6 +80,7 @@ ARMInterworking("arm-interworking", cl::Hidden,
   cl::init(true));
 
 namespace {
+
   class ARMCCState : public CCState {
   public:
     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
@@ -259,8 +269,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLibcallName(RTLIB::SHL_I128, 0);
   setLibcallName(RTLIB::SRL_I128, 0);
   setLibcallName(RTLIB::SRA_I128, 0);
-
-  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+  // @LOCALMOD: use standard names and calling conventions for pnacl
+  if (!Subtarget->isTargetNaCl() && Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 
     // Double-precision floating-point arithmetic helper functions
     // RTABI chapter 4.1.2, Table 2
     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
@@ -650,9 +660,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
-
+  // @LOCALMOD-START
+  if (!Subtarget->useInlineJumpTables())
+    setOperationAction(ISD::JumpTable,     MVT::i32,   Custom);
+  // @LOCALMOD-END
+  
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
+
   // Use the default implementation.
   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
@@ -665,9 +680,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // Non-Darwin platforms may return values in these registers via the
     // personality function.
     setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
+    // @LOCALMOD-START
     setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
-    setExceptionPointerRegister(ARM::R0);
-    setExceptionSelectorRegister(ARM::R1);
+    // we use the first caller saved regs here
+    // c.f.: llvm-gcc/llvm-gcc-4.2/gcc/unwind-dw2.c::uw_install_context
+    // NOTE: these are related to the _Unwind_PNaClSetResult{0,1} functions
+    setExceptionPointerRegister(ARM::R4);
+    setExceptionSelectorRegister(ARM::R5);
+
+    setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+
+    setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+    // @LOCALMOD-END
   }
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -753,8 +777,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
-  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
-
+  // @LOCALMOD-START
+  //setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT,     MVT::Other,
+                     Subtarget->useInlineJumpTables() ? Custom : Expand);
+  // @LOCALMOD-END
+  
   // We don't support sin/cos/fmod/copysign/pow
   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
@@ -791,6 +819,16 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     }
   }
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_THREAD_STACK_PADDING, MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_ALIGN,             MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TARGET_ARCH,          MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
   // We have target-specific dag combine patterns for the following nodes:
   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
@@ -885,6 +923,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  // @LOCALMOD-START
+  case ARMISD::WrapperJT2:    return "ARMISD::WrapperJT2"; 
+  case ARMISD::EH_RETURN:     return "ARMISD::EH_RETURN"; 
+  // @LOCALMOD-END
   case ARMISD::CALL:          return "ARMISD::CALL";
   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
@@ -1660,6 +1702,15 @@ ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
   assert((State->getCallOrPrologue() == Prologue ||
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
+
+  // @LOCALMOD-BEGIN
+  // This mechanism tries to split a byval argument between registers
+  // and the stack. It doesn't work correctly yet, so disable it.
+  // This leaves the entire byval argument on the stack.
+  // PR11018.
+  return;
+  // @LOCALMOD-END
+
   if ((!State->isFirstByValRegValid()) &&
       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
     State->setFirstByValReg(reg);
@@ -2044,7 +2095,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
 }
 
 unsigned ARMTargetLowering::getJumpTableEncoding() const {
-  return MachineJumpTableInfo::EK_Inline;
+  // @LOCALMOD-BEGIN
+  if (Subtarget->useInlineJumpTables()) { 
+    return MachineJumpTableInfo::EK_Inline;
+  } else {
+    // TODO: Find a better way to call the super-class.
+    return TargetLowering::getJumpTableEncoding();
+  }
+  // @LOCALMOD-END
 }
 
 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
@@ -2077,28 +2135,137 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
 }
 
+// @LOCALMOD-START
+// more conventional jumptable implementation
+SDValue ARMTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  assert(!Subtarget->useInlineJumpTables() &&
+         "inline jump tables not custom lowered");
+  const DebugLoc dl = Op.getDebugLoc();
+  EVT PTy = getPointerTy();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+  return DAG.getNode(ARMISD::WrapperJT2, dl, MVT::i32, JTI);
+}
+
+//////////////////////////////////////////////////////////////////////
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/stubs/tls_params.h
+SDValue ARMTargetLowering::LowerNaClTpAlign(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  // size_t __nacl_tp_alignment () {
+  //   return 4;
+  // }
+  return DAG.getConstant(4, Op.getValueType().getSimpleVT());
+}
+
+SDValue ARMTargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tls_offset (size_t tls_size) {
+  //   return 8;
+  // }
+  return DAG.getConstant(8, Op.getValueType().getSimpleVT());
+}
+
+SDValue ARMTargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) {
+  //   return -tdb_size;
+  // }
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+                     Op.getOperand(0));
+}
+
+SDValue
+ARMTargetLowering::LowerNaClThreadStackPadding(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // size_t __nacl_thread_stack_padding () {
+  //   return 0;
+  // }
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue
+ARMTargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const {
+  // size_t __nacl_target_arch () {
+  //   return PnaclTargetArchitectureARM_32;
+  // }
+  return DAG.getConstant(PnaclTargetArchitectureARM_32,
+                         Op.getValueType().getSimpleVT());
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// @LOCALMOD-END
+
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                                                  SelectionDAG &DAG) const {
   DebugLoc dl = GA->getDebugLoc();
   EVT PtrVT = getPointerTy();
-  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-  ARMConstantPoolValue *CPV =
-    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
-                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
-  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
-  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
-  SDValue Chain = Argument.getValue(1);
+  // @LOCALMOD-BEGIN
+  SDValue Chain;
+  SDValue Argument;
+
+  if (FlagSfiDisableCP) {
+    // With constant pools "disabled" (moved to rodata), this constant pool
+    // entry is no longer in text, and simultaneous PC relativeness
+    // and CP Addr relativeness is no longer expressible.
+    // So, instead of having:
+    //
+    // .LCPI12_0:
+    //   .long var(tlsgd)-((.LPC12_0+8) - .)
+    // ...
+    //    ldr r2, .LCPI12_0
+    // .LPC12_0:
+    //    add r0, pc, r2
+    //
+    // we have:
+    //
+    // .LCPI12_0:
+    //   .long var(tlsgd)
+    // ...
+    //    // get addr of .LCPI12_0 into r2
+    //    ldr r0, [r2]
+    //    add r0, r2, r0
+    // (1) No longer subtracting pc, so no longer adding that back
+    // (2) Not adding "." in the CP entry, so adding it via instructions.
+    //
+    unsigned char PCAdj = 0;
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::TLSGD,
+                                        false);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   false, false, false, 0);
+    Chain = Argument.getValue(1);
+    Argument = DAG.getNode(ISD::ADD, dl, PtrVT, Argument, CPAddr);
+  } else { // sort of @LOCALMOD-END
+    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+    Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); // @ LOCALMOD
+    Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    Chain = Argument.getValue(1); // @LOCALMOD
 
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+  } // @LOCALMOD-END
 
   // call __tls_get_addr.
   ArgListTy Args;
@@ -2135,25 +2302,49 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     MachineFunction &MF = DAG.getMachineFunction();
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-    // Initial exec model.
-    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
-    ARMConstantPoolValue *CPV =
+
+    // @LOCALMOD-BEGIN
+    if (FlagSfiDisableCP) {
+      // Similar to change to LowerToTLSGeneralDynamicModel, and
+      // for the same reason.
+      unsigned char PCAdj = 0;
+      ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+                                        false);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      Chain = Offset.getValue(1);
+
+      Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Offset, CPAddr);
+
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    } else { // sort of @LOCALMOD-END (indentation)
+      // Initial exec model.
+      unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+      ARMConstantPoolValue *CPV =
       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
                                       true);
-    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
-    Chain = Offset.getValue(1);
+      Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      Chain = Offset.getValue(1);
 
-    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    } // @LOCALMOD-END
   } else {
     // local exec model
     assert(model == TLSModel::LocalExec);
@@ -2305,17 +2496,55 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+
+  // @LOCALMOD-BEGIN
+  if (FlagSfiDisableCP) {
+    // With constant pools "disabled" (moved to rodata), the constant pool
+    // entry is no longer in text, and the PC relativeness is
+    // no longer expressible.
+    //
+    // Instead of having:
+    //
+    // .LCPI12_0:
+    //   .long _GLOBAL_OFFSET_TABLE_-(.LPC12_0+8)
+    // ...
+    //    ldr r2, .LCPI12_0
+    // .LPC12_0:
+    //    add r0, pc, r2
+    //
+    // Things to try:
+    // (1) get the address of the GOT through a pc-relative MOVW / MOVT.
+    //
+    //    movw r0, :lower16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8)
+    //    movt r0, :upper16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8)
+    // .LPC12_0:
+    //    add r0, pc, r0
+    //
+    // (2) Make the constant pool entry relative to its own location
+    //
+    // .LCPI12_0:
+    //   .long _GLOBAL_OFFSET_TABLE_-.
+    // ...
+    //    // get address of LCPI12_0 into r0 (possibly 3 instructions for PIC)
+    //    ldr r1, [r0]
+    //    add r1, r0, r1
+    //
+    // We will try (1) for now, since (2) takes about 3 more instructions
+    // (and one of them is a load).
+    return DAG.getNode(ARMISD::WrapperGOT, dl, MVT::i32);
+  } else { // Sort of LOCALMOD-END (indentation only
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   ARMConstantPoolValue *CPV =
     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
                                   ARMPCLabelIndex, PCAdj);
-  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               MachinePointerInfo::getConstantPool(),
-                               false, false, false, 0);
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, false, 0);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+  } // @LOCALMOD-END
 }
 
 SDValue
@@ -2707,10 +2936,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           // Since they could be overwritten by lowering of arguments in case of
           // a tail call.
           if (Flags.isByVal()) {
-            unsigned VARegSize, VARegSaveSize;
-            computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
-            VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
-            unsigned Bytes = Flags.getByValSize() - VARegSize;
+            // LOCALMOD-BEGIN (PR11018)
+            unsigned Bytes = Flags.getByValSize();
+            // @LOCALMOD-END
             if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
             int FI = MFI->CreateFixedObject(Bytes,
                                             VA.getLocMemOffset(), false);
@@ -4898,7 +5126,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
            "unexpected types for extended operands to VMULL");
     return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
   }
-
+  
   // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
   // isel lowering to take advantage of no-stall back to back vmul + vmla.
   //   vmull q0, d4, d6
@@ -4917,6 +5145,39 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
 }
 
+// @LOCALMOD-START
+// An EH_RETURN is the result of lowering llvm.eh.return.i32 which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue ARMTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+  const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  // Store stack offset in R2, jump target in R3, dummy return value in R0
+  // The dummy return value is needed to make the use-def chains happy,
+  // because the EH_RETURN instruction uses the isReturn attribute, which
+  // means preceding code needs to define the return register (R0 on ARM).
+  // http://code.google.com/p/nativeclient/issues/detail?id=2643
+  unsigned OffsetReg = ARM::R2;
+  unsigned AddrReg = ARM::R3;
+  unsigned ReturnReg = ARM::R0;
+  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+  Chain = DAG.getCopyToReg(Chain, dl, AddrReg, Handler);
+  Chain = DAG.getCopyToReg(Chain, dl, ReturnReg, DAG.getIntPtrConstant(0));
+  return DAG.getNode(ARMISD::EH_RETURN, dl,
+                     MVT::Other,
+                     Chain,
+                     DAG.getRegister(OffsetReg, MVT::i32),
+                     DAG.getRegister(AddrReg, getPointerTy()));
+  }
+// @LOCALMOD-END
+
+
 static SDValue
 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   // Convert to float
@@ -5162,7 +5423,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
-  case ISD::GlobalAddress:
+  case ISD::JumpTable:    return LowerJumpTable(Op, DAG); // @LOCALMOD
+   case ISD::GlobalAddress:
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -5181,6 +5443,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
+  // @LOCALMOD-START
+  // The exact semantics of this ISD are not completely clear.
+  // LLVM seems to always point the fp after the push ra and the old fp, i.e.
+  // two register slots after the beginning of the stack frame.
+  // It is not clear what happens when there is no frame pointer but
+  // but llvm unlike gcc seems to always force one when this node is
+  // encountered.
+  case ISD::FRAME_TO_ARGS_OFFSET: return DAG.getIntPtrConstant(2*4);
+  case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
+  // @LOCALMOD-END
+   
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -5211,6 +5484,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
+  // @LOCALMOD-BEGIN
+  case ISD::NACL_THREAD_STACK_PADDING:
+    return LowerNaClThreadStackPadding(Op, DAG);
+  case ISD::NACL_TP_ALIGN:         return LowerNaClTpAlign(Op, DAG);
+  case ISD::NACL_TP_TLS_OFFSET:    return LowerNaClTpTlsOffset(Op, DAG);
+  case ISD::NACL_TP_TDB_OFFSET:    return LowerNaClTpTdbOffset(Op, DAG);
+  case ISD::NACL_TARGET_ARCH:      return LowerNaClTargetArch(Op, DAG);
+  // @LOCALMOD-END
   }
 }
 
@@ -6391,7 +6672,11 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
       AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
                      .addReg(VReg1)
                      .addImm(LoopSize >> 16));
-  } else {
+  } else if (FlagSfiDisableCP) { // @LOCALMOD-START
+    BuildMI(BB, dl, TII->get(ARM::MOVi32imm))
+      .addReg(varEnd, RegState::Define)
+      .addImm(LoopSize);
+  } else { // @LOCALMOD-END
     MachineConstantPool *ConstantPool = MF->getConstantPool();
     Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
     const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
@@ -9120,6 +9405,16 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (Subtarget->isThumb1Only())
     return false;
 
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+      ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) ||
+       (FlagSfiStore && N->getOpcode() == ISD::STORE));
+  if (restrict_addressing_modes_for_nacl) {
+    return false;
+  }
+  // @LOCALMOD-END
+
   EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
@@ -9158,7 +9453,15 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                                    SelectionDAG &DAG) const {
   if (Subtarget->isThumb1Only())
     return false;
-
+   // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+      ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) ||
+       (FlagSfiStore && N->getOpcode() == ISD::STORE));
+  if (restrict_addressing_modes_for_nacl) {
+    return false;
+  }
+  // @LOCALMOD-END
   EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 7ad48b9b53..1f2ace4082 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -40,10 +40,13 @@ namespace llvm {
       WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
                     // PIC mode.
       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
-
+      // @LOCALMOD-START
+      WrapperJT2,   // like WrapperJT but without the UID
+      WrapperGOT,   // A Wrapper node for GOT addresses
+      EH_RETURN,    // For LowerEH_RETURN
+      // @LOCALMOD-END
       // Add pseudo op to model memcpy for struct byval.
       COPY_STRUCT_BYVAL,
-
       CALL,         // Function call.
       CALL_PRED,    // Function call that's predicable.
       CALL_NOLINK,  // Function call with branch not branch-and-link.
@@ -428,6 +431,16 @@ namespace llvm {
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                  SelectionDAG &DAG,
                                  TLSModel::Model model) const;
+    // @LOCALMOD-START
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClThreadStackPadding(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpAlign(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const;
+    // @LOCALMOD-END
+
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 50ae826a38..611d9194fd 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -88,6 +88,14 @@ def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
 def ARMWrapperDYN    : SDNode<"ARMISD::WrapperDYN",  SDTIntUnaryOp>;
 def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
 def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+// @LOCALMOD-START
+// support non-inline jumptables
+// we do not use the extre uid immediate that comes with ARMWrapperJT
+// TODO(robertm): figure out what it is used for
+def ARMWrapperJT2    : SDNode<"ARMISD::WrapperJT2",  SDTIntUnaryOp>;
+// Support for MOVW/MOVT'ing the GOT address directly into a register.
+def ARMWrapperGOT       : SDNode<"ARMISD::WrapperGOT",  SDTPtrLeaf>;
+// @LOCALMOD-END
 
 def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
                               [SDNPHasChain, SDNPOutGlue]>;
@@ -241,6 +249,11 @@ def UseFusedMAC      : Predicate<"!TM.Options.NoExcessFPPrecision && "
 def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "
                                  "Subtarget->isTargetDarwin()">;
 
+// @LOCALMOD-BEGIN
+def UseConstPool : Predicate<"Subtarget->useConstPool()">;
+def DontUseConstPool : Predicate<"!Subtarget->useConstPool()">;
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -768,7 +781,8 @@ def postidx_reg : Operand<i32> {
 // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
 def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
 def addrmode2 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+                ComplexPattern<i32, 3, "SelectAddrMode2", [],
+                               [SDNPWantRoot]> { // @LOCALMOD
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
   let ParserMatchClass = AddrMode2AsmOperand;
@@ -808,7 +822,8 @@ def am2offset_imm : Operand<i32>,
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
 def addrmode3 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+                ComplexPattern<i32, 3, "SelectAddrMode3", [],
+                               [SDNPWantRoot]> { // @LOCALMOD
   let EncoderMethod = "getAddrMode3OpValue";
   let PrintMethod = "printAddrMode3Operand";
   let ParserMatchClass = AddrMode3AsmOperand;
@@ -1529,6 +1544,46 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
 // Instructions
 //===----------------------------------------------------------------------===//
 
+// @LOCALMOD-START
+
+// New ARM SFI Model
+include "ARMInstrNaCl.td"
+
+// Older Macro base SFI Model
+def SFI_GUARD_LOADSTORE :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+let Defs = [CPSR] in
+def SFI_GUARD_LOADSTORE_TST :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a), NoItinerary, []>;
+
+// Like SFI_GUARD_LOADSTORE, but reserved for loads into SP.
+def SFI_GUARD_SP_LOAD :
+PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_INDIRECT_CALL :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_INDIRECT_JMP :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_CALL :
+PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+// NOTE: the BX_RET instruction hardcodes lr as well
+def SFI_GUARD_RETURN :
+PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+def SFI_NOP_IF_AT_BUNDLE_END :
+PseudoInst<(outs), (ins), NoItinerary, []>;
+
+// Note: intention is that $src and $dst are the same register.
+def SFI_DATA_MASK :
+PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+
+// @LOCALMOD-END
+
+
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions.
 //
@@ -1849,6 +1904,33 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
 
 // SP is marked as a use to prevent stack-pointer assignments that appear
 // immediately before calls from potentially appearing dead.
+// @LOCALMOD-START
+// Exception handling related Node and Instructions.
+// The conversion sequence is:
+// ISD::EH_RETURN -> ARMISD::EH_RETURN ->  
+// ARMeh_return -> (stack change + indirect branch)
+// 
+// ARMeh_return takes the place of regular return instruction
+// but takes two arguments.
+// R2, R3 are used for storing the offset and return address respectively.
+def SDT_ARMEHRET : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+
+def ARMehret : SDNode<"ARMISD::EH_RETURN", SDT_ARMEHRET,
+                      [SDNPHasChain, SDNPOptInGlue]>;
+
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+  Defs = [SP],
+  Uses = [SP] in {
+    def ARMeh_return : PseudoInst<(outs), 
+                               (ins GPR:$spadj, GPR:$dst),
+                               IIC_Br,
+                               [(ARMehret GPR:$spadj, GPR:$dst)]>,
+                    Requires<[IsARM]>; 
+}
+// @LOCALMOD-END
+
+
 let isCall = 1,
   // FIXME:  Do we really need a non-predicated version? If so, it should
   // at least be a pseudo instruction expanding to the predicated version
@@ -2937,6 +3019,69 @@ def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
 
 } // Constraints
 
+// @LOCALMOD-BEGIN
+// PIC / PC-relative versions of MOVi16/MOVTi16, which have an extra 
+// operand representing the ID of the PICADD instruction that corrects
+// for relativity. This is used to materialize addresses into
+// a register in a PC-relative manner.
+// 
+// E.g. Rather than have an absolute address in $imm, and transferred to 
+// a register with:
+//    movw $Rd, :lower16:$imm
+//    movt $Rd, :upper16:$imm
+//
+// we will instead have a relative offset:
+//    movw $Rd, :lower16:$imm - ($pic_add_id + 8)
+//    ...
+//    movt $Rd, :upper16:$imm - ($pic_add_id + 8)
+//    ...
+// $pic_add_id:
+//    add  $Rd, pc, $Rd
+//
+// One way these pseudo instructions (and the corresponding PICADD) 
+// come about is during expansion of the MOVi32imm pseudo instruction
+// (see ARMExpandPseudo::ExpandMBB).
+// These pseudo instructions become real instructions when they are
+// finally lowered to MCInsts (e.g., at ARMAsmPrinter::EmitInstruction),
+// and the extra pclabel ID becomes part of the appropriate operand.
+//
+// NOTE: aside from adding the pclabel operand, all other operands should
+// be the same as the non-PIC versions to simplify conversion to the 
+// non-pseudo instructions.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+    neverHasSideEffects = 1 in
+def MOVi16PIC : PseudoInst<(outs GPR:$Rd), (ins imm0_65535_expr:$imm,
+                                                pclabel:$pic_add_id,
+                                                pred:$p),
+                           IIC_iMOVi,
+                           []>,
+                 Requires<[IsARM, HasV6T2]>, UnaryDP;
+
+let Constraints = "$src = $Rd" in
+def MOVTi16PIC : PseudoInst<(outs GPR:$Rd), (ins GPR:$src,
+                                                 imm0_65535_expr:$imm,
+                                                 pclabel:$pic_add_id,
+                                                 pred:$p),
+                            IIC_iMOVi,
+                            []>,
+                 UnaryDP, Requires<[IsARM, HasV6T2]>;
+// @LOCALMOD-END
+
+// @LOCALMOD-BEGIN
+// Pseudo-instruction that will be expanded into MOVW / MOVT (PIC versions) w/
+// GOT as the operand. 
+// The alternative is to create a constant pool entry with the (relative)
+// GOT address and load from the constant pool. This is currently used
+// when constant islands are turned off, since MOVW / MOVT will be faster.
+let isReMaterializable = 1, isMoveImm = 1, neverHasSideEffects = 1 in
+def MOVGOTAddr : PseudoInst<(outs GPR:$dst), (ins),
+                           IIC_iMOVix2, // will expand to two MOVi's
+                           []>,
+                           Requires<[IsARM, UseMovt]>;
+
+def : ARMPat<(ARMWrapperGOT), (MOVGOTAddr)>;
+// @LOCALMOD-END
+
 def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
       Requires<[IsARM, HasV6T2]>;
 
@@ -3044,6 +3189,8 @@ def UBFX  : I<(outs GPR:$Rd),
 //  Arithmetic Instructions.
 //
 
+
+
 defm ADD  : AsI1_bin_irs<0b0100, "add",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
                          BinOpFrag<(add  node:$LHS, node:$RHS)>, "ADD", 1>;
@@ -4794,9 +4941,20 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
 // ConstantPool, GlobalAddress, and JumpTable
 def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
             Requires<[IsARM, DontUseMovt]>;
-def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+// @LOCALMOD-START
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+// @LOCALMOD-END
 def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
             Requires<[IsARM, UseMovt]>;
+// @LOCALMOD-START
+def : ARMPat<(ARMWrapper  tconstpool :$dst), (MOVi32imm tconstpool :$dst)>,
+            Requires<[IsARM, UseMovt, DontUseConstPool]>;
+def : ARMPat<(ARMWrapper  tconstpool :$dst), (LEApcrel tconstpool :$dst)>,
+            Requires<[IsARM, UseMovt, UseConstPool]>;
+def : ARMPat<(ARMWrapperJT2  tjumptable :$dst), (MOVi32imm tjumptable :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+// @LOCALMOD-END
 def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
              (LEApcrelJT tjumptable:$dst, imm:$id)>;
 
@@ -5142,3 +5300,47 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm",
 // 'it' blocks in ARM mode just validate the predicates. The IT itself
 // is discarded.
 def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>;
+                   
+// @LOCALMOD-BEGIN
+//===----------------------------------------------------------------------===//
+// NativeClient intrinsics
+// These provide the ability to implement several low-level features without
+// having to link native ASM code on the client.
+// This code has to be kept in sync with include/llvm/Intrinsics.td and
+// lib/Target/X86InstrNaCl.{td, cpp}.
+// TODO(sehr): conditionalize this on IsNaCl64 | IsNaCl32 | IsNaClArm.
+
+let Uses = [R0], Defs = [R0] in {
+  // Saves all the callee-saves registers, sp, and lr to the JMP_BUF structure
+  // pointed to by r0.  The JMP_BUF structure is the maximum size over all
+  // supported architectures.
+  def NACL_SETJ : AXI<(outs), (ins),
+                      MiscFrm, NoItinerary,
+                      // Bundle start
+                      "sfi_nop_if_at_bundle_end; "
+                      "sfi_data_mask r0; "
+                      "stmia r0!, {{r4, r5, r6, r7, r8, r10, r11, sp, lr}}; "
+                      "mov r0, #0; ",
+                      [(set R0, (int_nacl_setjmp R0, LR))]>;
+}
+
+let isBranch = 1, isBarrier = 1, isTerminator = 1, Uses = [R0, R1] in {
+  // Restores all the callee-saves registers, sp, and lr from the JMP_BUF
+  // structure pointed to by r0.  Returns the value in r1 at entry.  This
+  // implements the tail of longjmp, with the normalization of the return value
+  // (if the caller passes zero to longjmp, it should return 1) done in the
+  // caller.
+  def NACL_LONGJ : AXI<(outs), (ins), MiscFrm, NoItinerary,
+                       // Bundle start
+                       "ldmia r0!, {{r4, r5, r6, r7, r8, r10, r11, r12, lr}}; "
+                       "sfi_nop_if_at_bundle_end; "
+                       "mov sp, r12; "
+                       "sfi_data_mask sp; "
+                       "movs r0, r1; "
+                       "moveq r0, #1; "
+                       "sfi_nop_if_at_bundle_end; "
+                       "sfi_code_mask lr; "
+                       "bx lr; ",
+                       [(int_nacl_longjmp R0, R1)]>;
+}
+// @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e3a1c8c8f4..58119baea5 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3540,12 +3540,24 @@ def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
 // ConstantPool, GlobalAddress, and JumpTable
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
            Requires<[IsThumb2, DontUseMovt]>;
+// @LOCALMOD-START
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>,
+            Requires<[IsThumb2, DontUseMovt]>;
+// @LOCALMOD-END
 def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
            Requires<[IsThumb2, UseMovt]>;
 
 def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
             (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+// @LOCALMOD-START
+def : T2Pat<(ARMWrapper  tconstpool :$dst), (t2MOVi32imm tconstpool :$dst)>,
+            Requires<[IsThumb2, UseMovt, DontUseConstPool]>;
+def : T2Pat<(ARMWrapper  tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>,
+            Requires<[IsThumb2, UseMovt, UseConstPool]>;
+def : T2Pat<(ARMWrapperJT2  tjumptable :$dst), (t2MOVi32imm tjumptable :$dst)>,
+            Requires<[IsThumb2, UseMovt]>;
+// @LOCALMOD-END
 
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index cb1b2a2172..6f055c53a9 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -546,6 +546,12 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
     break;
   }
 
+  // @LOCALMOD-BEGIN
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2575
+  if (MI->hasOptionalDef())
+    return false;
+  // @LOCALMOD-END
+
   // Make sure the offset fits in 8 bits.
   if (Bytes == 0 || (Limit && Bytes >= Limit))
     return false;
@@ -579,6 +585,12 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
     break;
   }
 
+  // @LOCALMOD-BEGIN
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2575
+  if (MI->hasOptionalDef())
+    return false;
+  // @LOCALMOD-END
+
   if (Bytes == 0 || (Limit && Bytes >= Limit))
     // Make sure the offset fits in 8 bits.
     return false;
@@ -709,6 +721,7 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
 /// ldmia rn, <ra, rb, rc>
 /// =>
 /// ldmdb rn!, <ra, rb, rc>
+/// @LOCALMOD This is especially useful for rn == sp
 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MBBI,
                                                bool &Advance,
@@ -1389,7 +1402,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 ///   mov pc, lr
 /// =>
 ///   ldmfd sp!, {..., pc}
+// @LOCALMOD for sfi we do not want this to happen
 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+  // @LOCALMOD-START
+  // For NaCl, do not load into PC directly for a return, since NaCl requires
+  // masking the address first.
+  if (STI->isTargetNaCl()) {
+    return false;
+  }
+  // @LOCALMOD-END
+
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index e2ac9a466e..3dd0848058 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -123,3 +123,57 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
       OutMI.addOperand(MCOp);
   }
 }
+
+// @LOCALMOD-BEGIN
+// Unlike LowerARMMachineInstrToMCInst, the opcode has already been set.
+// Otherwise, this is like LowerARMMachineInstrToMCInst, but with special
+// handling where the "immediate" is PC Relative
+// (used for MOVi16PIC / MOVTi16PIC, etc. -- see .td file)
+void llvm::LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI,
+                                             MCInst &OutMI,
+                                             ARMAsmPrinter &AP,
+                                             unsigned ImmIndex,
+                                             unsigned PCIndex,
+                                             MCSymbol *PCLabel,
+                                             unsigned PCAdjustment) {
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (i == ImmIndex) {
+      MCContext &Ctx = AP.OutContext;
+      const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, Ctx);
+      if (PCAdjustment) {
+        const MCExpr *AdjExpr = MCConstantExpr::Create(PCAdjustment, Ctx);
+        PCRelExpr = MCBinaryExpr::CreateAdd(PCRelExpr, AdjExpr, Ctx);
+      }
+
+      // Get the usual symbol operand, then subtract the PCRelExpr.
+      const MachineOperand &MOImm = MI->getOperand(ImmIndex);
+      MCOperand SymOp;
+      bool DidLower = AP.lowerOperand(MOImm, SymOp);
+      assert (DidLower && "Immediate-like operand should have been lowered");
+
+      const MCExpr *Expr = SymOp.getExpr();
+      ARMMCExpr::VariantKind TargetKind = ARMMCExpr::VK_ARM_None;
+      /* Unwrap and rewrap the ARMMCExpr */
+      if (Expr->getKind() == MCExpr::Target) {
+        const ARMMCExpr *TargetExpr = cast<ARMMCExpr>(Expr);
+        TargetKind = TargetExpr->getKind();
+        Expr = TargetExpr->getSubExpr();
+      }
+      Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, Ctx);
+      if (TargetKind != ARMMCExpr::VK_ARM_None) {
+        Expr = ARMMCExpr::Create(TargetKind, Expr, Ctx);
+      }
+      MCOperand MCOp = MCOperand::CreateExpr(Expr);
+      OutMI.addOperand(MCOp);
+    } else if (i == PCIndex) {  // dummy index already handled as PCLabel
+      continue;
+    } else {
+      MCOperand MCOp;
+      if (AP.lowerOperand(MI->getOperand(i), MCOp)) {
+        OutMI.addOperand(MCOp);
+      }
+    }
+  }
+}
+// @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 31d5d38d84..59fd484db3 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -146,7 +146,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                         unsigned Align, bool isVolatile,
                         MachinePointerInfo DstPtrInfo) const {
   // Use default for non AAPCS (or Darwin) subtargets
-  if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
+  if (Subtarget->isTargetNaCl() || !Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) // @LOCALMOD
     return SDValue();
 
   const ARMTargetLowering &TLI =
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index db6512c9b9..73c77eb131 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -23,13 +23,22 @@
 
 using namespace llvm;
 
-static cl::opt<bool>
+cl::opt<bool> // @LOCALMOD
 ReserveR9("arm-reserve-r9", cl::Hidden,
           cl::desc("Reserve R9, making it unavailable as GPR"));
 
 static cl::opt<bool>
 DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
 
+// @LOCALMOD-START
+// TODO: * JITing has not been tested at all
+//       * Thumb mode operation is also not clear: it seems jump tables
+//         for thumb are broken independent of this option
+static cl::opt<bool>
+NoInlineJumpTables("no-inline-jumptables",
+                  cl::desc("Do not place jump tables inline in the code"));
+// @LOCALMOD-END
+                     
 static cl::opt<bool>
 StrictAlign("arm-strict-align", cl::Hidden,
             cl::desc("Disallow all unaligned memory accesses"));
@@ -58,6 +67,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
   , NoARM(false)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
+  , UseInlineJumpTables(!NoInlineJumpTables) // @LOCALMOD
   , UseMovt(false)
   , SupportsTailCall(false)
   , HasFP16(false)
@@ -116,6 +126,12 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
     SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
   }
 
+  // @LOCALMOD-BEGIN
+  // NaCl uses MovT to avoid generating constant islands.
+  if (isTargetNaCl() && !useConstPool())
+    UseMovt = true;
+  // @LOCALMOD-END
+
   if (!isThumb() || hasThumb2())
     PostRAScheduler = true;
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e72b06fa3f..a83f4e57cf 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -23,6 +23,14 @@
 #define GET_SUBTARGETINFO_HEADER
 #include "ARMGenSubtargetInfo.inc"
 
+// @LOCALMOD-BEGIN
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+  extern cl::opt<bool> FlagSfiDisableCP;
+}
+// @LOCALMOD-END
+
+
 namespace llvm {
 class GlobalValue;
 class StringRef;
@@ -158,6 +166,12 @@ protected:
   /// Selected instruction itineraries (one entry per itinerary class.)
   InstrItineraryData InstrItins;
 
+  // @LOCALMOD-START
+  /// UseInlineJumpTables - True if jump tables should be in-line in the code.
+  bool UseInlineJumpTables;
+  // @LOCALMOD-END
+
+
  public:
   enum {
     isELF, isDarwin
@@ -247,6 +261,9 @@ protected:
   bool useMovt() const { return UseMovt && hasV6T2Ops(); }
   bool supportsTailCall() const { return SupportsTailCall; }
 
+  // @LOCALMOD
+  bool useConstPool() const { return !FlagSfiDisableCP; }
+
   bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
 
   const std::string & getCPUString() const { return CPUString; }
@@ -270,6 +287,8 @@ protected:
   /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
   /// symbol.
   bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+  bool useInlineJumpTables() const {return UseInlineJumpTables;} // @LOCALMOD
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 9aa8308920..99ed63293d 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -28,6 +28,13 @@ EnableGlobalMerge("global-merge", cl::Hidden,
                   cl::desc("Enable global merge pass"),
                   cl::init(true));
 
+// @LOCALMOD-START
+namespace llvm {
+cl::opt<bool> FlagSfiDisableCP("sfi-disable-cp",
+                               cl::desc("disable arm constant island pools"));
+}
+// @LOCALMOD-END
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -189,8 +196,24 @@ bool ARMPassConfig::addPreEmitPass() {
     addPass(UnpackMachineBundlesID);
   }
 
+  // @LOCALMOD-START
+  // Note with FlagSfiDisableCP we effectively disable the
+  // ARMConstantIslandPass and rely on movt/movw to eliminate the need
+  // for constant islands
+  if (FlagSfiDisableCP) {
+    assert(getARMSubtarget().useMovt());
+  }
+  // @LOCALMOD-END
+
   PM->add(createARMConstantIslandPass());
 
+  // @LOCALMOD-START
+  // This pass does all the heavy sfi lifting.
+  if (getARMSubtarget().isTargetNaCl()) {
+    PM->add(createARMNaClRewritePass());
+  }
+  // @LOCALMOD-END
+ 
   return true;
 }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index abcdb24c0c..4da778cd79 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -29,6 +29,13 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/ADT/OwningPtr.h"
 
+// @LOCALMOD-START
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+   extern cl::opt<bool> FlagSfiDisableCP;
+}
+// @LOCALMOD-END
+
 namespace llvm {
 
 class ARMBaseTargetMachine : public LLVMTargetMachine {
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index a5ea1c202e..00c495b89a 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -38,7 +38,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                  ELF::SHF_WRITE |
                                  ELF::SHF_ALLOC,
                                  SectionKind::getDataRel());
-    LSDASection = NULL;
+    //LSDASection = NULL;
   }
 
   AttributesSection =
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 9a2aab5304..92c5d92ff7 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -34,6 +34,8 @@ add_llvm_target(ARMCodeGen
   ARMLoadStoreOptimizer.cpp
   ARMMCInstLower.cpp
   ARMMachineFunctionInfo.cpp
+  ARMNaClHeaders.cpp
+  ARMNaClRewritePass.cpp
   ARMRegisterInfo.cpp
   ARMSelectionDAGInfo.cpp
   ARMSubtarget.cpp
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 70e4317cef..9eda04d776 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -166,6 +166,71 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
+  // @LOCALMOD-BEGIN
+  // TODO(pdox): Kill this code once we switch to MC object emission
+  const char *SFIInst = NULL;
+  unsigned SFIEmitDest = ~0;
+  unsigned SFIEmitPred = ~0;
+  switch (Opcode) {
+  case ARM::SFI_NOP_IF_AT_BUNDLE_END :
+    SFIInst = "sfi_nop_if_at_bundle_end";
+    SFIEmitDest = ~0;
+    SFIEmitPred = ~0;
+    break;
+  case ARM::SFI_GUARD_LOADSTORE    :
+    SFIInst = "sfi_load_store_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_INDIRECT_CALL:
+    SFIInst = "sfi_indirect_call_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_INDIRECT_JMP :
+    SFIInst = "sfi_indirect_jump_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_DATA_MASK          :
+    SFIInst = "sfi_data_mask";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_LOADSTORE_TST:
+    SFIInst = "sfi_cload_store_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = ~0;
+    break;
+  case ARM::SFI_GUARD_CALL     :
+    SFIInst = "sfi_call_preamble";
+    SFIEmitDest = ~0;
+    SFIEmitPred = 0;
+    break;
+  case ARM::SFI_GUARD_RETURN   :
+    SFIInst = "sfi_return_preamble lr,";
+    SFIEmitDest = ~0;
+    SFIEmitPred = 0;
+    break;
+  }
+  if (SFIInst) {
+    O << '\t' << SFIInst;
+    if (SFIEmitDest != ~0) {
+      O << ' ';
+      printOperand(MI, SFIEmitDest, O);
+    }
+    if (SFIEmitDest != ~0 && SFIEmitPred != ~0) {
+      O << ',';
+    }
+    if (SFIEmitPred != ~0) {
+      O << ' ';
+      printPredicateOperand(MI, SFIEmitPred, O);
+    }
+    O << '\n';
+    return;
+  }
+  // @LOCALMOD-END
+  
   if (Opcode == ARM::tLDMIA) {
     bool Writeback = true;
     unsigned BaseReg = MI->getOperand(0).getReg();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index ac6ce642df..d8d8d53a57 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -11,6 +11,7 @@
 #include "MCTargetDesc/ARMBaseInfo.h"
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
+#include "MCTargetDesc/ARMMCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -232,8 +233,16 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
   const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding
                                         : ARMv4_NopEncoding;
   uint64_t NumNops = Count / 4;
+  // @LOCALMOD-BEGIN-UPSTREAM
+  // FIXME: e1a00000 vs e320f000
+  //  e1a00000 is mov r0, r0 which may result in a stall
+  //  but the real nop instruction is not available on early hw....
+  //  Perhaps this really needs to be switched on the Subtarget??
+  //  GNU as likes to emit e320f000...
   for (uint64_t i = 0; i != NumNops; ++i)
-    OW->Write32(nopEncoding);
+    OW->Write32(0xe320f000); // regular NOP
+  // @LOCALMOD-END
+
   // FIXME: should this function return false when unable to write exactly
   // 'Count' bytes with NOP encodings?
   switch (Count % 4) {
@@ -559,13 +568,31 @@ namespace {
 class ELFARMAsmBackend : public ARMAsmBackend {
 public:
   uint8_t OSABI;
+  Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove.
   ELFARMAsmBackend(const Target &T, const StringRef TT,
-                   uint8_t _OSABI)
-    : ARMAsmBackend(T, TT), OSABI(_OSABI) { }
+                   uint8_t _OSABI,
+                   Triple::OSType _OSType)
+    : ARMAsmBackend(T, TT), OSABI(_OSABI), OSType(_OSType) { }
 
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value) const;
 
+  // @LOCALMOD-BEGIN
+  // FIXME! NaCl should INHERIT from ELFARMAsmBackend, not
+  // add to it.
+  unsigned getBundleSize() const {
+    return (OSType == Triple::NativeClient) ? 16 : 0;
+  }
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    if (OSType == Triple::NativeClient) {
+      return CustomExpandInstNaClARM(Inst, Out);
+    }
+    return false;
+  }
+
+ // @LOCALMOD-END
+
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return createARMELFObjectWriter(OS, OSABI);
   }
@@ -694,5 +721,5 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT) {
     assert(0 && "Windows not supported on ARM");
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
-  return new ELFARMAsmBackend(T, TT, OSABI);
+  return new ELFARMAsmBackend(T, TT, OSABI, TheTriple.getOS());
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index aa649badaf..8dee1b1d6a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -71,10 +71,11 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
                                                    const MCFixup &Fixup,
                                                    bool IsPCRel) const {
   const MCSymbol &Symbol = Target.getSymA()->getSymbol().AliasedSymbol();
+  const MCSymbol &ASymbol = Symbol.AliasedSymbol();
   bool EmitThisSym = false;
 
   const MCSectionELF &Section =
-    static_cast<const MCSectionELF&>(Symbol.getSection());
+    static_cast<const MCSectionELF&>(ASymbol.getSection());
   bool InNormalSection = true;
   unsigned RelocType = 0;
   RelocType = GetRelocTypeInner(Target, Fixup, IsPCRel);
@@ -137,9 +138,9 @@ const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
   }
 
   if (EmitThisSym)
-    return &Symbol;
+    return &ASymbol;
   if (! Symbol.isTemporary() && InNormalSection) {
-    return &Symbol;
+    return &ASymbol;
   }
   return NULL;
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index d32805e522..832d1394bc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -58,4 +58,9 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
   // Exceptions handling
   if (EnableARMEHABI)
     ExceptionsType = ExceptionHandling::ARM;
+
+  // @LOCALMOD-BEGIN
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfCFI;
+  // @LOCALMOD-END
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 5df84c8b10..e581cc82fa 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -78,10 +78,22 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
         // features.
         if (NoCPU)
           // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
-          ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+          // @LOCALMOD-BEGIN
+          // Orig:    ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
+          // TODO(pdox): Eliminate this strange exception, possibly
+          // with our own cpu tag. (neon doesn't work, but vfp2 does).
+          // We also don't seem to handle The DSP features.
+          ARMArchFeature = "+v7,+db,+vfp2";
+          // @LOCALMOD-END
         else
           // Use CPU to figure out the exact features.
-          ARMArchFeature = "+v7";
+          // @LOCALMOD-BEGIN
+          // Orig:    ARMArchFeature = "+v7";
+          // TODO(pdox): Eliminate this strange exception, possibly
+          // with our own cpu tag. (neon doesn't work, but vfp2 does).
+          // We also don't seem to handle The DSP features.
+          ARMArchFeature = "+v7,+db,+vfp2";
+          // @LOCALMOD-END
       }
     } else if (SubVer == '6') {
       if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
@@ -146,7 +158,16 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
   if (TheTriple.isOSDarwin())
     return new ARMMCAsmInfoDarwin();
 
-  return new ARMELFMCAsmInfo();
+  // @LOCALMOD-BEGIN
+  ARMELFMCAsmInfo *MAI = new ARMELFMCAsmInfo();
+  if (TheTriple.getOS() == Triple::NativeClient) {
+    // Initial state of the frame ARM:SP points to cfa
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(ARM::SP, 0);
+    MAI->addInitialFrameState(0, Dst, Src);
+  }
+  return MAI;
+  // @LOCALMOD-END
 }
 
 static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 256599412e..3ee853c822 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_library(LLVMARMDesc
   ARMMCAsmInfo.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
+  ARMMCNaCl.cpp # LOCALMOD
   ARMMCTargetDesc.cpp
   ARMMachObjectWriter.cpp
   ARMELFObjectWriter.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 9b4caf65cb..b8fe772544 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -14,6 +14,7 @@
 
 #include "MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MCTargetDesc/MipsMCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
@@ -74,7 +75,10 @@ public:
     :MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle), Is64Bit(_is64Bit) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createMipsELFObjectWriter(OS, OSType, IsLittle, Is64Bit);
+    // @LOCALMOD-BEGIN-UPSTREAM
+    return createMipsELFObjectWriter(OS,
+      MCELFObjectTargetWriter::getOSABI(OSType), IsLittle, Is64Bit);
+    // @LOCALMOD-END-UPSTREAM
   }
 
   /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
@@ -206,8 +210,33 @@ public:
   ///
   /// \return - True on success.
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+    // @LOCALMOD-START
+    uint64_t NumNops = Count / 4;
+    for (uint64_t i = 0; i != NumNops; ++i)
+      OW->Write32(0x0); // regular NOP
+
+    switch (Count % 4) {
+    case 0: break; // No leftover bytes to write
+    default:
+      return false;  // TODO(rtrk): Should we handle this differently?
+    }
+    // @LOCALMOD-END
     return true;
   }
+
+  // @LOCALMOD-BEGIN
+  // FIXME! NaCl should INHERIT from MipsAsmBackend, not add to it.
+  unsigned getBundleSize() const {
+    return (OSType == Triple::NativeClient) ? 16 : 0;
+  }
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    if (OSType == Triple::NativeClient) {
+      return CustomExpandInstNaClMips(Inst, Out);
+    }
+    return false;
+  }
+  // @LOCALMOD-END
 }; // class MipsAsmBackend
 
 } // namespace
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 6353da3c9d..3b0e59b87a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -159,6 +159,12 @@ getJumpTargetOpValue(const MCInst &MI, unsigned OpNo,
                      SmallVectorImpl<MCFixup> &Fixups) const {
 
   const MCOperand &MO = MI.getOperand(OpNo);
+
+  /* LOCALMOD-START */
+  // If the destination is an immediate, we have nothing to do.
+  if (MO.isImm()) return (unsigned)MO.getImm() / 4;
+  /* LOCALMOD-END */
+
   assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions");
 
   const MCExpr *Expr = MO.getExpr();
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 5ce2347144..ed61b642fc 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -18,6 +18,16 @@
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
+/* @LOCALMOD-START */
+namespace llvm {
+
+namespace Mips {
+  extern unsigned LoadStoreStackMaskReg;
+  extern unsigned IndirectBranchMaskReg;
+}
+} // End llvm namespace
+/* @LOCALMOD-END */
+
 namespace llvm {
   class MipsTargetMachine;
   class FunctionPass;
@@ -27,6 +37,10 @@ namespace llvm {
   FunctionPass *createMipsJITCodeEmitterPass(MipsTargetMachine &TM,
                                              JITCodeEmitter &JCE);
 
+  // @LOCALMOD-START
+  FunctionPass *createMipsNaClRewritePass();
+  // @LOCALMOD-END
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8cc645f73f..b09c51179a 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -195,13 +195,24 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
 }
 
 void MipsAsmPrinter::EmitFunctionEntryLabel() {
-  if (OutStreamer.hasRawTextSupport()) {
+  // @LOCALMOD-START
+  // make sure function entry is aligned. We use XmagicX as our basis
+  // for alignment decisions (c.f. assembler sfi macros).
+  int alignment = MF->getAlignment();
+  if (alignment < 4) alignment = 4;
+  EmitAlignment(alignment);
+  if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
     if (Subtarget->inMips16Mode())
       OutStreamer.EmitRawText(StringRef("\t.set\tmips16"));
     else
       OutStreamer.EmitRawText(StringRef("\t.set\tnomips16"));
     // leave out until FSF available gas has micromips changes
     // OutStreamer.EmitRawText(StringRef("\t.set\tnomicromips"));
+    OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n"));
+  }
+  // @LOCALMOD-END
+
+  if (OutStreamer.hasRawTextSupport()) {
     OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
   }
   OutStreamer.EmitLabel(CurrentFnSym);
@@ -455,6 +466,10 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
   O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
 }
 
+// @LOCALMOD-START
+extern void EmitMipsSFIHeaders(raw_ostream &O);
+// @LOCALMOD-END
+
 void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // FIXME: Use SwitchSection.
 
@@ -476,7 +491,35 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // return to previous section
   if (OutStreamer.hasRawTextSupport())
     OutStreamer.EmitRawText(StringRef("\t.previous"));
+
+  // @LOCALMOD-START
+  if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
+    std::string str;
+    raw_string_ostream OS(str);
+    EmitMipsSFIHeaders(OS);
+    OutStreamer.EmitRawText(StringRef(OS.str()));
+  }
+  // @LOCALMOD-END
+}
+
+// @LOCALMOD-START
+unsigned MipsAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 4;
+    }
+  }
+  return 0;
 }
+// @LOCALMOD-END
 
 MachineLocation
 MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 562bf9ce00..a426f55ba7 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -74,6 +74,10 @@ public:
   void EmitStartOfAsmFile(Module &M);
   virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  // @LOCALMOD-START
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;
+  // @LOCALMOD-END
 };
 }
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 62f7cdea3c..b1ac73579f 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -282,7 +282,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
     if (VT.getSizeInBits() / 8 > LS->getAlignment()) {
       assert(TLI.allowsUnalignedMemoryAccesses(VT) &&
              "Unaligned loads/stores not supported for this type.");
-      if (VT == MVT::f32)
+      if (VT == MVT::f32 && !Subtarget.isTargetNaCl()/*@LOCALMOD*/)
         return false;
     }
   }
@@ -346,7 +346,7 @@ SelectAddr(SDNode *Parent, SDValue Addr, SDValue &Base, SDValue &Offset) {
 
     // If an indexed floating point load/store can be emitted, return false.
     if (LS && (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
-        Subtarget.hasMips32r2Or64())
+        Subtarget.hasMips32r2Or64() && !Subtarget.isTargetNaCl()/*@LOCALMOD*/)
       return false;
   }
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 1793a0fa21..04d4743b35 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -288,6 +288,15 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setTruncStoreAction(MVT::i64, MVT::i32, Custom);
   }
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_THREAD_STACK_PADDING, MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_ALIGN,             MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
   setTargetDAGCombine(ISD::ADDE);
   setTargetDAGCombine(ISD::SUBE);
   setTargetDAGCombine(ISD::SDIVREM);
@@ -313,7 +322,7 @@ bool MipsTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
   case MVT::i32:
     return true;
   case MVT::f32:
-    return Subtarget->hasMips32r2Or64();
+    return Subtarget->hasMips32r2Or64() && !Subtarget->isTargetNaCl()/*@LOCALMOD*/;
   default:
     return false;
   }
@@ -781,6 +790,14 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case ISD::SRL_PARTS:          return LowerShiftRightParts(Op, DAG, false);
     case ISD::LOAD:               return LowerLOAD(Op, DAG);
     case ISD::STORE:              return LowerSTORE(Op, DAG);
+
+    // @LOCALMOD-BEGIN
+    case ISD::NACL_THREAD_STACK_PADDING:
+      return LowerNaClThreadStackPadding(Op, DAG);
+    case ISD::NACL_TP_ALIGN:      return LowerNaClTpAlign(Op, DAG);
+    case ISD::NACL_TP_TLS_OFFSET: return LowerNaClTpTlsOffset(Op, DAG);
+    case ISD::NACL_TP_TDB_OFFSET: return LowerNaClTpTdbOffset(Op, DAG);
+    // @LOCALMOD-END
   }
   return SDValue();
 }
@@ -1633,6 +1650,35 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
   return DAG.getNode(ISD::ADD, dl, ValTy, Load, Lo);
 }
 
+// @LOCALMOD-BEGIN
+
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/nacl/tls_params.h
+
+SDValue MipsTargetLowering::LowerNaClThreadStackPadding(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue MipsTargetLowering::LowerNaClTpAlign(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  return DAG.getConstant(4, Op.getValueType().getSimpleVT());
+}
+
+SDValue MipsTargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue MipsTargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+		     Op.getOperand(0));
+}
+// @LOCALMOD-END
+
 SDValue MipsTargetLowering::
 LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
@@ -1647,6 +1693,34 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 
   TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
+  // @LOCALMOD-BEGIN
+  if (getTargetMachine().getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                                 MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                                 MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, TGALo);
+    SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+
+    unsigned PtrSize = PtrVT.getSizeInBits();
+    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+    SDValue TlsGetAddr = DAG.getExternalSymbol("__tls_get_addr", PtrVT);
+
+    ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(DAG.getEntryNode(),
+                   (Type *) Type::getInt32Ty(*DAG.getContext()),
+                   false, false, false, false, 0, CallingConv::C, false,
+                   false, true, TlsGetAddr, Args, DAG, dl);
+
+    SDValue ThreadPointer = CallResult.first;
+    return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+  }
+  // @LOCALMOD-END
+
   if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
     // General Dynamic and Local Dynamic TLS Model.
     unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 740cf06406..5342e37f28 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -150,6 +150,13 @@ namespace llvm {
     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
 
+    // @LOCALMOD-BEGIN
+    SDValue LowerNaClThreadStackPadding(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpAlign(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    // @LOCALMOD-END
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 0e954a8727..29bd2dc494 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -277,25 +277,26 @@ let Predicates = [NotN64, NotMips64, HasStandardEncoding] in {
 }
 
 // Indexed loads and stores.
-let Predicates = [HasMips32r2Or64, HasStandardEncoding] in {
+let Predicates = [HasMips32r2Or64, NotNaCl/*@LOCALMOD*/] in {
   def LWXC1 : FPIdxLoad<0x0, "lwxc1", FGR32, CPURegs, load_a>;
   def LUXC1 : FPIdxLoad<0x5, "luxc1", FGR32, CPURegs, load_u>;
   def SWXC1 : FPIdxStore<0x8, "swxc1", FGR32, CPURegs, store_a>;
   def SUXC1 : FPIdxStore<0xd, "suxc1", FGR32, CPURegs, store_u>;
 }
 
-let Predicates = [HasMips32r2, NotMips64, HasStandardEncoding] in {
+let Predicates = [HasMips32r2, NotMips64, NotNaCl/*@LOCALMOD*/] in {
   def LDXC1 : FPIdxLoad<0x1, "ldxc1", AFGR64, CPURegs, load_a>;
   def SDXC1 : FPIdxStore<0x9, "sdxc1", AFGR64, CPURegs, store_a>;
 }
 
-let Predicates = [HasMips64, NotN64, HasStandardEncoding], DecoderNamespace="Mips64" in {
+let Predicates = [HasMips64, NotN64, NotNaCl/*@LOCALMOD*/],
+    DecoderNamespace="Mips64" in {
   def LDXC164 : FPIdxLoad<0x1, "ldxc1", FGR64, CPURegs, load_a>;
   def SDXC164 : FPIdxStore<0x9, "sdxc1", FGR64, CPURegs, store_a>;
 }
 
 // n64
-let Predicates = [IsN64, HasStandardEncoding], isCodeGenOnly=1 in {
+let Predicates = [IsN64, NotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
   def LWXC1_P8   : FPIdxLoad<0x0, "lwxc1", FGR32, CPU64Regs, load_a>;
   def LUXC1_P8   : FPIdxLoad<0x5, "luxc1", FGR32, CPU64Regs, load_u>;
   def LDXC164_P8 : FPIdxLoad<0x1, "ldxc1", FGR64, CPU64Regs, load_a>;
@@ -459,13 +460,13 @@ let Predicates = [IsFP64bit, HasStandardEncoding] in {
 }
 
 // Patterns for unaligned floating point loads and stores.
-let Predicates = [HasMips32r2Or64, NotN64, HasStandardEncoding] in {
+let Predicates = [HasMips32r2Or64, NotN64, NotNaCl/*@LOCALMOD*/] in {
   def : Pat<(f32 (load_u CPURegs:$addr)), (LUXC1 CPURegs:$addr, ZERO)>;
   def : Pat<(store_u FGR32:$src, CPURegs:$addr),
             (SUXC1 FGR32:$src, CPURegs:$addr, ZERO)>;
 }
 
-let Predicates = [IsN64, HasStandardEncoding] in {
+let Predicates = [IsN64, NotNaCl/*@LOCALMOD*/] in {
   def : Pat<(f32 (load_u CPU64Regs:$addr)), (LUXC1_P8 CPU64Regs:$addr, ZERO_64)>;
   def : Pat<(store_u FGR32:$src, CPU64Regs:$addr),
             (SUXC1_P8 FGR32:$src, CPU64Regs:$addr, ZERO_64)>;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index a9af4e65df..60343293e8 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -171,6 +171,8 @@ def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasStandardEncoding : Predicate<"Subtarget.hasStandardEncoding()">,
                           AssemblerPredicate<"!FeatureMips16">;
+def IsNaCl       :    Predicate<"Subtarget.isTargetNaCl()">;
+def NotNaCl      :    Predicate<"!Subtarget.isTargetNaCl()">;
 
 //===----------------------------------------------------------------------===//
 // Instruction format superclass
@@ -829,6 +831,37 @@ class SCBase<bits<6> Opc, string opstring, RegisterClass RC, Operand Mem> :
 // Pseudo instructions
 //===----------------------------------------------------------------------===//
 
+// @LOCALMOD-START
+
+// Older Macro based SFI Model
+def SFI_GUARD_LOADSTORE :
+MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_load_store_preamble\t$dst, $src1, $src2", []>;
+
+def SFI_GUARD_INDIRECT_CALL :
+MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_indirect_call_preamble\t$dst, $src1, $src2", []>;
+
+def SFI_GUARD_INDIRECT_JMP :
+MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_indirect_jump_preamble\t$dst, $src1, $src2", []>;
+
+def SFI_GUARD_CALL :
+MipsPseudo<(outs), (ins), "sfi_call_preamble", []>;
+
+def SFI_GUARD_RETURN :
+MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_return_preamble\t$dst, $src1, $src2", []>;
+
+def SFI_NOP_IF_AT_BUNDLE_END :
+MipsPseudo<(outs), (ins), "sfi_nop_if_at_bundle_end", []>;
+
+def SFI_DATA_MASK :
+MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_data_mask\t$dst, $src1, $src2", []>;
+
+// @LOCALMOD-END
+
 // As stack alignment is always done with addiu, we need a 16-bit immediate
 let Defs = [SP], Uses = [SP] in {
 def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 161762ccf8..0475777eac 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -162,6 +162,50 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
 //  "addiu $2, $2, %lo(_gp_disp)"
 void MipsMCInstLower::LowerSETGP01(SmallVector<MCInst, 4>& MCInsts) {
   MCOperand RegOpnd = MCOperand::CreateReg(Mips::V0);
+  MCInst Instr4, Mask1, Mask2; // @LOCALMOD
+  // @LOCALMOD-START
+  MCOperand MaskReg = MCOperand::CreateReg(Mips::LoadStoreStackMaskReg);
+  // @LOCALMOD-END
+
+  // @LOCALMOD-START
+  if (AsmPrinter.TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+    Mask1.setOpcode(Mips::SFI_GUARD_LOADSTORE);
+    Mask1.addOperand(Base);
+    Mask1.addOperand(Base);
+    Mask1.addOperand(MaskReg);
+
+    Mask2.setOpcode(Mips::SFI_GUARD_LOADSTORE);
+    Mask2.addOperand(Base);
+    Mask2.addOperand(Base);
+    Mask2.addOperand(MaskReg);
+    if (Opc == Mips::ULW || Opc == Mips::USW || Opc == Mips::ULHu) {
+     // FIXME: ULHu should be rewritten because it uses mips32r2 instr. INS
+      MCInsts.push_back(Mask1);
+      MCInsts.push_back(Instr1);
+      MCInsts.push_back(Mask2);
+      MCInsts.push_back(Instr2);
+      if (!TwoInstructions) MCInsts.push_back(Instr3);
+      return;
+    } else if (Opc == Mips::ULH) {
+      MCInsts.push_back(Mask1);
+      MCInsts.push_back(Instr1);
+      MCInsts.push_back(Mask2);
+      MCInsts.push_back(Instr2);
+      MCInsts.push_back(Instr3);
+      MCInsts.push_back(Instr4);
+      return;
+    } else if (Opc == Mips::USH) {
+      MCInsts.push_back(Mask1);
+      MCInsts.push_back(Instr1);
+      MCInsts.push_back(Instr2);
+      MCInsts.push_back(Mask2);
+      MCInsts.push_back(Instr3);
+      return;
+    } else {
+      llvm_unreachable("unaligned instruction not sandboxed");
+    }
+  }
+  // @LOCALMOD-END
   StringRef SymName("_gp_disp");
   const MCSymbol *Sym = Ctx->GetOrCreateSymbol(SymName);
   const MCSymbolRefExpr *MCSym;
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 5fdeda92ec..203cd9031c 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -85,7 +85,8 @@ BitVector MipsRegisterInfo::
 getReservedRegs(const MachineFunction &MF) const {
   static const uint16_t ReservedCPURegs[] = {
     Mips::ZERO, Mips::AT, Mips::K0, Mips::K1,
-    Mips::SP, Mips::RA
+    Mips::SP, Mips::RA,
+    Mips::T6, Mips::T7, Mips::T8          // @LOCALMOD: reserved for PNaCl use
   };
 
   static const uint16_t ReservedCPU64Regs[] = {
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index f072802db6..835ac6d05b 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -31,6 +31,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
   IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false),
   HasMinMax(false), HasSwap(false), HasBitCount(false), InMips16Mode(false)
+  // @LOCALMOD-START
+  , TargetTriple(TT)
+  // @LOCALMOD-END
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 3215c44be0..4f520d77cc 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -91,6 +91,8 @@ protected:
 
   InstrItineraryData InstrItins;
 
+  Triple TargetTriple;  // @LOCALMOD
+
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -139,6 +141,13 @@ public:
   bool hasMinMax()    const { return HasMinMax; }
   bool hasSwap()      const { return HasSwap; }
   bool hasBitCount()  const { return HasBitCount; }
+
+  // @LOCALMOD-BEGIN
+  bool isTargetNaCl() const {
+    return TargetTriple.getOS() == Triple::NativeClient;
+  }
+  // @LOCALMOD-END
+
 };
 } // End llvm namespace
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 7ba610e3b1..8b67572348 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -125,6 +125,14 @@ bool MipsPassConfig::addInstSelector() {
 // print out the code after the passes.
 bool MipsPassConfig::addPreEmitPass() {
   PM->add(createMipsDelaySlotFillerPass(getMipsTargetMachine()));
+
+  // @LOCALMOD-START
+  if (getMipsSubtarget().isTargetNaCl()) {
+    // This pass does all the heavy sfi lifting.
+    PM->add(createMipsNaClRewritePass());
+  }
+  // @LOCALMOD-END
+
   return true;
 }
 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 04dc60aa6b..e91b2d811f 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -37,6 +37,23 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
                                ELF::SHF_WRITE |ELF::SHF_ALLOC,
                                SectionKind::getBSS());
 
+  // @LOCALMOD-BEGIN
+  // Without this the linker defined symbols __fini_array_start and
+  // __fini_array_end do not have useful values. c.f.:
+  // http://code.google.com/p/nativeclient/issues/detail?id=805
+  if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) {
+    StaticCtorSection =
+      getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+    StaticDtorSection =
+      getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  }
+  // @LOCALMOD-END
 }
 
 // A address must be loaded from a small section if its size is less than the
@@ -65,6 +82,12 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
   if (Subtarget.isLinux())
     return false;
 
+  // @LOCALMOD-BEGIN
+  // Do not use small section for NaCl.
+  if (Subtarget.isTargetNaCl())
+    return false;
+  // @LOCALMOD-BEGIN
+
   // Only global variables, not functions.
   const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
   if (!GVA)
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index b9b2526876..e785d330ae 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -25,6 +25,7 @@ using namespace llvm;
 namespace llvm {
   bool HasDivModLibcall;
   bool AsmVerbosityDefault(false);
+  bool TLSUseCall; // @LOCALMOD
 }
 
 static cl::opt<bool>
@@ -35,6 +36,20 @@ static cl::opt<bool>
 FunctionSections("ffunction-sections",
   cl::desc("Emit functions into separate sections"),
   cl::init(false));
+// @LOCALMOD-BEGIN
+// Use a function call to get the thread pointer for TLS accesses,
+// instead of using inline code.
+static cl::opt<bool, true>
+EnableTLSUseCall("mtls-use-call",
+  cl::desc("Use a function call to get the thread pointer for TLS accesses."),
+  cl::location(TLSUseCall),
+  cl::init(false));
+
+static cl::opt<bool>
+  ForceTLSNonPIC("force-tls-non-pic",
+                 cl::desc("Force TLS to use non-PIC models"),
+                 cl::init(false));
+// @LOCALMOD-END
 
 //---------------------------------------------------------------------------
 // TargetMachine Class
@@ -83,6 +98,7 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
   bool isHidden = GV->hasHiddenVisibility();
 
   if (getRelocationModel() == Reloc::PIC_ &&
+      !ForceTLSNonPIC && // @LOCALMOD
       !Options.PositionIndependentExecutable) {
     if (isLocal || isHidden)
       return TLSModel::LocalDynamic;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index f612e2365e..5b402da3ad 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,6 +27,7 @@ set(sources
   X86JITInfo.cpp
   X86MCInstLower.cpp
   X86MachineFunctionInfo.cpp
+  X86NaClRewritePass.cpp
   X86RegisterInfo.cpp
   X86SelectionDAGInfo.cpp
   X86Subtarget.cpp
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 1c240e52a3..8be0c5e6d7 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMX86Desc
   X86MCTargetDesc.cpp
   X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp
+  X86MCNaCl.cpp # LOCALMOD
   X86MachObjectWriter.cpp
   X86ELFObjectWriter.cpp
   X86WinCOFFObjectWriter.cpp
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 32e40febd2..fb1ba12a52 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -9,6 +9,7 @@
 
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -327,8 +328,10 @@ namespace {
 class ELFX86AsmBackend : public X86AsmBackend {
 public:
   uint8_t OSABI;
-  ELFX86AsmBackend(const Target &T, uint8_t _OSABI)
-    : X86AsmBackend(T), OSABI(_OSABI) {
+  Triple::OSType OSType; // @LOCALMOD: kept OSTYPE vs upstream. FIXME: remove.
+  ELFX86AsmBackend(const Target &T, uint8_t _OSABI,
+                   Triple::OSType _OSType)
+    : X86AsmBackend(T), OSABI(_OSABI), OSType(_OSType) {
     HasReliableSymbolDifference = true;
   }
 
@@ -336,12 +339,28 @@ public:
     const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
     return ES.getFlags() & ELF::SHF_MERGE;
   }
+
+  // @LOCALMOD-BEGIN
+  // FIXME! NaCl should inherit from ELFX86AsmBackend!
+  unsigned getBundleSize() const {
+    return OSType == Triple::NativeClient ? 32 : 0;
+  }
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    if (OSType == Triple::NativeClient) {
+      return CustomExpandInstNaClX86(Inst, Out);
+    }
+    return false;
+  }
+  // @LOCALMOD-END
+
 };
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI)
-    : ELFX86AsmBackend(T, OSABI) {}
+  ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,
+                      Triple::OSType OSType) // @LOCALMOD: kept OSType
+    : ELFX86AsmBackend(T, OSABI, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return createX86ELFObjectWriter(OS, /*Is64Bit*/ false, OSABI);
@@ -350,8 +369,9 @@ public:
 
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
 public:
-  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI)
-    : ELFX86AsmBackend(T, OSABI) {}
+  ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,
+                      Triple::OSType OSType) // @LOCALMOD: kept OSType
+    : ELFX86AsmBackend(T, OSABI, OSType) {}
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
     return createX86ELFObjectWriter(OS, /*Is64Bit*/ true, OSABI);
@@ -449,7 +469,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT) {
     return new WindowsX86AsmBackend(T, false);
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  return new ELFX86_32AsmBackend(T, OSABI);
+  return new ELFX86_32AsmBackend(T, OSABI, TheTriple.getOS());
 }
 
 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
@@ -462,5 +482,5 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT) {
     return new WindowsX86AsmBackend(T, true);
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  return new ELFX86_64AsmBackend(T, OSABI);
+  return new ELFX86_64AsmBackend(T, OSABI, TheTriple.getOS());
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 10046b29b1..c3f46ebda0 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -291,6 +291,8 @@ namespace X86II {
     /// manual, this operand is described as pntr16:32 and pntr16:16
     RawFrmImm16 = 44,
 
+    CustomFrm = 62, // @LOCALMOD
+
     FormMask       = 63,
 
     //===------------------------------------------------------------------===//
@@ -542,6 +544,7 @@ namespace X86II {
     case X86II::MRMSrcReg:
     case X86II::RawFrmImm8:
     case X86II::RawFrmImm16:
+    case X86II::CustomFrm: // @LOCALMOD
        return -1;
     case X86II::MRMDestMem:
       return 0;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 49c07f3b37..5b44481e90 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -71,8 +71,18 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
 void X86ELFMCAsmInfo::anchor() { }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
-  if (T.getArch() == Triple::x86_64)
-    PointerSize = 8;
+
+  // @LOCALMOD-BEGIN
+  if (T.getArch() == Triple::x86_64) {
+    if (T.getOS() == Triple::NativeClient) {
+      PointerSize = 4;
+      StackSlotSize = 8;
+    } else {
+      PointerSize = 8;
+      StackSlotSize = 8;
+    }
+  }
+  // @LOCALMOD-END
 
   AssemblerDialect = AsmWriterFlavor;
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 12f1961ed8..ec5b92e317 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -1006,6 +1006,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
   case X86II::Pseudo:
     llvm_unreachable("Pseudo instruction shouldn't be emitted");
+  // @LOCALMOD-BEGIN
+  case X86II::CustomFrm:
+    assert(0 && "CustomFrm instruction shouldn't be emitted");
+  // @LOCALMOD-END
   case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
     break;
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index bf05ccfe99..3883812d8c 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -47,6 +47,11 @@ FunctionPass *createCleanupLocalDynamicTLSPass();
 ///
 FunctionPass *createX86FloatingPointStackifierPass();
 
+// @LOCALMOD-BEGIN - Creates a pass to make instructions follow NaCl SFI rules.
+FunctionPass* createX86NaClRewritePass();
+FunctionPass* createX86NaClRewriteFinalPass();
+// @LOCALMOD-END
+
 /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
 /// before each call to avoid transition penalty between functions encoded with
 /// AVX and SSE.
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index d30c8df164..ee66e7ce1c 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -72,6 +72,35 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
+// @LOCALMOD-BEGIN
+bool X86AsmPrinter::UseReadOnlyJumpTables() const {
+  return Subtarget->isTargetNaCl();
+}
+
+unsigned X86AsmPrinter::GetTargetBasicBlockAlign() const {
+  if (Subtarget->isTargetNaCl())
+    return 5;
+  return 0;
+}
+
+unsigned X86AsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 5;
+    }
+  }
+  return 0;
+}
+// @LOCALMOD-END
+
 /// printSymbolOperand - Print a raw symbol reference operand.  This handles
 /// jump tables, constant pools, global address and external symbols, all of
 /// which print to a label with various suffixes for relocation types etc.
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index a6ed9ba006..9f61c59a13 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -45,7 +45,13 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   virtual void EmitEndOfAsmFile(Module &M);
   
   virtual void EmitInstruction(const MachineInstr *MI);
-  
+
+  virtual bool UseReadOnlyJumpTables() const; // @LOCALMOD
+
+  virtual unsigned GetTargetBasicBlockAlign() const; // @LOCLAMOD
+
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;//@LOCALMOD
+
   void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
 
   // These methods are used by the tablegen'erated instruction printer.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 3079dfa7cf..271384ab27 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -12,7 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "x86-emitter"
+#define DEBUG_TYPE "jit"
 #include "X86InstrInfo.h"
 #include "X86JITInfo.h"
 #include "X86Subtarget.h"
@@ -35,6 +35,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetOpcodes.h" // @LOCALMOD
 using namespace llvm;
 
 STATISTIC(NumEmitted, "Number of machine instructions emitted");
@@ -1114,6 +1115,28 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
 
   unsigned Opcode = Desc->Opcode;
 
+  // @LOCALMOD-START
+  if (TM.getSubtargetImpl()->isTargetNaCl()) {
+    switch (Opcode) {
+    case TargetOpcode::BUNDLE_LOCK:
+      MCE.beginBundleLock();
+      return;
+    case TargetOpcode::BUNDLE_UNLOCK:
+      MCE.endBundleLock();
+      return;
+    case TargetOpcode::BUNDLE_ALIGN_START:
+      MCE.alignToBundleBeginning();
+      return;
+    case TargetOpcode::BUNDLE_ALIGN_END:
+      MCE.alignToBundleEnd();
+      return;
+    }
+    // In addition to groups of instructions, each instruction must itself be
+    // bundle-locked because they are emitted with multiple calls into MCE
+    MCE.beginBundleLock();
+  }
+  // @LOCALMOD-END
+  
   // If this is a two-address instruction, skip one of the register operands.
   unsigned NumOps = Desc->getNumOperands();
   unsigned CurOp = 0;
@@ -1470,5 +1493,11 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
     llvm_unreachable(0);
   }
 
+  // @LOCALMOD-START
+  if (TM.getSubtargetImpl()->isTargetNaCl()) {
+    MCE.endBundleLock();
+  }
+  // @LOCALMOD-END
+
   MCE.processDebugLoc(MI.getDebugLoc(), false);
 }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 07d0e7647a..0b213e9a90 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1793,10 +1793,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   if (CalleeOp) {
     // Register-indirect call.
     unsigned CallOpc;
-    if (Subtarget->is64Bit())
-      CallOpc = X86::CALL64r;
-    else
-      CallOpc = X86::CALL32r;
+    // @LOCALMOD-BEGIN
+    if (Subtarget->is64Bit()) {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL64r;
+      } else {
+        CallOpc = X86::CALL64r;
+      }
+    } else {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL32r;
+      } else {
+        CallOpc = X86::CALL32r;
+      }
+    }
+    // @LOCALMOD-END
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
       .addReg(CalleeOp);
 
@@ -1804,10 +1815,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     // Direct call.
     assert(GV && "Not a direct call");
     unsigned CallOpc;
-    if (Subtarget->is64Bit())
-      CallOpc = X86::CALL64pcrel32;
-    else
-      CallOpc = X86::CALLpcrel32;
+    // @LOCALMOD-BEGIN
+    if (Subtarget->is64Bit()) {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL64pcrel32;
+      } else {
+        CallOpc = X86::CALL64pcrel32;
+      }
+    } else {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALLpcrel32;
+      } else {
+        CallOpc = X86::CALLpcrel32;
+      }
+    }
+    // @LOCALMOD-END
 
     // See if we need any target-specific flags on the GV operand.
     unsigned char OpFlags = 0;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 244f9bbfaf..af9efbd906 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -114,6 +114,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
+  case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD
+  case X86::NACL_CG_TCRETURNri64: // @LOCALMOD
   case X86::TCRETURNmi64:
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
@@ -317,7 +319,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
   bool HasFP = hasFP(MF);
 
   // Calculate amount of bytes used for return address storing.
-  int stackGrowth = -TD->getPointerSize();
+  int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD
 
   // FIXME: This is dirty hack. The code itself is pretty mess right now.
   // It should be rewritten from scratch and generalized sometimes.
@@ -717,7 +719,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   std::vector<MachineMove> &Moves = MMI.getFrameMoves();
   const TargetData *TD = MF.getTarget().getTargetData();
   uint64_t NumBytes = 0;
-  int stackGrowth = -TD->getPointerSize();
+  int stackGrowth = -TM.getFrameLowering()->getStackSlotSize(); // @LOCALMOD
 
   if (HasFP) {
     // Calculate required stack adjustment.
@@ -985,6 +987,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64:
+  case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD
+  case X86::NACL_CG_TCRETURNri64: // @LOCALMOD
   case X86::EH_RETURN:
   case X86::EH_RETURN64:
     break;  // These are ok
@@ -1085,6 +1089,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
              RetOpcode == X86::TCRETURNmi ||
              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::NACL_CG_TCRETURNri64 || // @LOCALMOD
+             RetOpcode == X86::NACL_CG_TCRETURNdi64 || // @LOCALMOD
              RetOpcode == X86::TCRETURNmi64) {
     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
     // Tail call return: adjust the stack pointer and jump to callee.
@@ -1110,10 +1116,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
 
     // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64 ||
+        RetOpcode == X86::NACL_CG_TCRETURNdi64) { // @LOCALMOD
+      // @LOCALMOD-BEGIN
+      unsigned TailJmpOpc;
+      switch (RetOpcode) {
+      case X86::TCRETURNdi  : TailJmpOpc = X86::TAILJMPd; break;
+      case X86::TCRETURNdi64: TailJmpOpc = X86::TAILJMPd64; break;
+      case X86::NACL_CG_TCRETURNdi64:
+        TailJmpOpc = X86::NACL_CG_TAILJMPd64;
+        break;
+      default: llvm_unreachable("Unexpected return opcode");
+      }
+      // @LOCALMOD-END
       MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
-                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+        BuildMI(MBB, MBBI, DL, TII.get(TailJmpOpc)); // @LOCALMOD
+
       if (JumpTarget.isGlobal())
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                              JumpTarget.getTargetFlags());
@@ -1131,6 +1149,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     } else if (RetOpcode == X86::TCRETURNri64) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
         addReg(JumpTarget.getReg(), RegState::Kill);
+// @LOCALMOD-BEGIN
+    } else if (RetOpcode == X86::NACL_CG_TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::NACL_CG_TAILJMPr64)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+// @LOCALMOD-END
     } else {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
         addReg(JumpTarget.getReg(), RegState::Kill);
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index dc515dc39c..d46c41f508 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -29,7 +29,8 @@ public:
   explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
     : TargetFrameLowering(StackGrowsDown,
                           sti.getStackAlignment(),
-                          (sti.is64Bit() ? -8 : -4)),
+                          (sti.is64Bit() ? -8 : -4),
+                          1, (sti.is64Bit() ? 8 : 4)), // @LOCALMOD
       TM(tm), STI(sti) {
   }
 
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index d381f3da4b..e9ec6c1522 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -41,6 +41,7 @@ using namespace llvm;
 
 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
 
+
 //===----------------------------------------------------------------------===//
 //                      Pattern Matcher Implementation
 //===----------------------------------------------------------------------===//
@@ -94,7 +95,7 @@ namespace {
         return RegNode->getReg() == X86::RIP;
       return false;
     }
-    
+
     void setBaseReg(SDValue Reg) {
       BaseType = RegBase;
       Base_Reg = Reg;
@@ -212,6 +213,10 @@ namespace {
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &NodeWithChain);
+    // @LOCALMOD-BEGIN
+    void LegalizeIndexForNaCl(SDValue N, X86ISelAddressMode &AM);
+    // @LOCALMOD-END
+
     
     bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
@@ -229,8 +234,9 @@ namespace {
     inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 
                                    SDValue &Scale, SDValue &Index,
                                    SDValue &Disp, SDValue &Segment) {
+      EVT MemOpVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;  // @LOCALMOD
       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, MemOpVT) : // @LOCALMOD
         AM.Base_Reg;
       Scale = getI8Imm(AM.Scale);
       Index = AM.IndexReg;
@@ -288,6 +294,15 @@ namespace {
     const X86InstrInfo *getInstrInfo() {
       return getTargetMachine().getInstrInfo();
     }
+
+    // @LOCALMOD-START
+    bool selectingMemOp;
+    bool RestrictUseOfBaseReg() {
+      return selectingMemOp && Subtarget->isTargetNaCl64();
+    }
+    // @LOCALMOD-END
+
+
   };
 }
 
@@ -431,6 +446,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 
     if (OptLevel != CodeGenOpt::None &&
+        !Subtarget->isTargetNaCl() &&   // @LOCALMOD: We can't fold load/call
         (N->getOpcode() == X86ISD::CALL ||
          N->getOpcode() == X86ISD::TC_RETURN)) {
       /// Also try moving call address load from outside callseq_start to just
@@ -461,7 +477,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       ++NumLoadMoved;
       continue;
     }
-    
+  
     // Lower fpround and fpextend nodes that target the FP stack to be store and
     // load to the stack.  This is a gross hack.  We would like to simply mark
     // these as being illegal, but when we do that, legalize produces these when
@@ -583,7 +599,15 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
 
 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
-  
+
+  // @LOCALMOD-START
+  // Disable this tls access optimization in Native Client, since
+  // gs:0 (or fs:0 on X86-64) does not exactly contain its own address.
+  if (Subtarget->isTargetNaCl()) {
+    return true;
+  }
+  // @LOCALMOD-END
+    
   // load gs:0 -> GS segment register.
   // load fs:0 -> FS segment register.
   //
@@ -700,6 +724,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   if (MatchAddressRecursively(N, AM, 0))
     return true;
 
+
+  if (!RestrictUseOfBaseReg()) {   // @LOCALMOD
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&
@@ -708,7 +734,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
     AM.Base_Reg = AM.IndexReg;
     AM.Scale = 1;
   }
-
+  } // @LOCALMOD
+  
   // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
   // because it has a smaller encoding.
   // TODO: Which other code models can use this?
@@ -1055,6 +1082,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // FALL THROUGH
   case ISD::MUL:
   case X86ISD::MUL_IMM:
+    // @LOCALMOD
+    if (!RestrictUseOfBaseReg()) {
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         AM.Base_Reg.getNode() == 0 &&
@@ -1087,6 +1116,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           return false;
         }
     }
+    } // @LOCALMOD
     break;
 
   case ISD::SUB: {
@@ -1173,6 +1203,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       return false;
     AM = Backup;
 
+    if (!RestrictUseOfBaseReg()) { // @LOCALMOD
     // If we couldn't fold both operands into the address at the same time,
     // see if we can just put each operand into a register and fold at least
     // the add.
@@ -1185,6 +1216,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       AM.Scale = 1;
       return false;
     }
+    } // @LOCALMOD
     N = Handle.getValue();
     break;
   }
@@ -1244,7 +1276,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
 /// specified addressing mode without any further recursion.
 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
-  // Is the base register already occupied?
+  if (RestrictUseOfBaseReg()) { // @LOCALMOD
+    if (AM.IndexReg.getNode() == 0) {
+      AM.IndexReg = N;
+      AM.Scale = 1;
+      return false;
+    }
+    return true;
+  } // @LOCALMOD
+// Is the base register already occupied?
   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
     // If so, check to see if the scale index register is set.
     if (AM.IndexReg.getNode() == 0) {
@@ -1274,6 +1314,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
+  // @LOCALMOD
+  selectingMemOp = true;
   
   if (Parent &&
       // This list of opcodes are all the nodes that have an "addr:$ptr" operand
@@ -1293,7 +1335,16 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
   if (MatchAddress(N, AM))
     return false;
 
-  EVT VT = N.getValueType();
+  // @LOCALMOD-START
+  if (Subtarget->isTargetNaCl64()) {
+    // NaCl needs to zero the top 32-bits of the index, so we can't
+    // allow the index register to be negative.
+    LegalizeIndexForNaCl(N, AM);
+  }
+  // @LOCALMOD-END
+
+  EVT VT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; // @LOCALMOD
+
   if (AM.BaseType == X86ISelAddressMode::RegBase) {
     if (!AM.Base_Reg.getNode())
       AM.Base_Reg = CurDAG->getRegister(0, VT);
@@ -1303,6 +1354,32 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
     AM.IndexReg = CurDAG->getRegister(0, VT);
 
   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+
+  // @LOCALMOD-BEGIN
+  // For Native Client 64-bit, zero-extend 32-bit pointers
+  // to 64-bits for memory operations.  Most of the time, this
+  // won't generate any additional instructions because the backend
+  // knows that operations on 32-bit registers implicitly zero-extends.
+  // If we don't do this, there are a few corner cases where LLVM might
+  // assume the upper bits won't be modified or used, but since we
+  // always clear the upper bits, this is not a good assumption.
+  // http://code.google.com/p/nativeclient/issues/detail?id=1564
+  if (Subtarget->isTargetNaCl64()) {
+    assert(Base.getValueType() == MVT::i64 && "Unexpected base operand size");
+
+    if (Index.getValueType() != MVT::i64) {
+      Index = CurDAG->getZExtOrTrunc(Index, Index.getDebugLoc(), MVT::i64);
+      // Insert the new node into the topological ordering.
+      if (Parent &&
+          (Index->getNodeId() == -1 ||
+           Index->getNodeId() > Parent->getNodeId())) {
+        CurDAG->RepositionNode(Parent, Index.getNode());
+        Index->setNodeId(Parent->getNodeId());
+      }
+    }
+  }
+  // @LOCALMOD-END
+
   return true;
 }
 
@@ -1365,6 +1442,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   SDValue Copy = AM.Segment;
   SDValue T = CurDAG->getRegister(0, MVT::i32);
   AM.Segment = T;
+  // @LOCALMOD
+  selectingMemOp = false;
   if (MatchAddress(N, AM))
     return false;
   assert (T == AM.Segment);
@@ -1428,7 +1507,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
   AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   AM.SymbolFlags = GA->getTargetFlags();
 
-  if (N.getValueType() == MVT::i32) {
+  if (N.getValueType() == MVT::i32 && 
+      !Subtarget->isTargetNaCl64()) {   // @LOCALMOD
     AM.Scale = 1;
     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
   } else {
@@ -1453,6 +1533,111 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
+// @LOCALMOD-BEGIN
+// LegalizeIndexForNaCl - NaCl specific addressing fix
+//
+//   Because NaCl needs to zero the top 32-bits of the index, we can't
+//   allow the index register to be negative. However, if we are using a base
+//   frame index, global address or the constant pool, and AM.Disp > 0, then
+//   negative values of "index" may be expected to legally occur.
+//   To avoid this, we fold the displacement (and scale) back into the 
+//   index. This results in a LEA before the current instruction.
+//   Unfortunately, this may add a requirement for an additional register.
+//
+//   For example, this sandboxed code is broken if %eax is negative:
+//
+//     movl %eax,%eax
+//     incl -30(%rbp,%rax,4)
+//
+//   Instead, we now generate:
+//     leal -30(%rbp,%rax,4), %tmp
+//     movl %tmp,%tmp
+//     incl (%r15,%tmp,1)
+//
+//  TODO(espindola): This might not be complete since the matcher can select
+//  any dag node to go in the index. This is also not how the rest of the
+//  matcher logic works, if the matcher selects something, it must be
+//  valid and not depend on further patching. A more desirable fix is
+//  probably to update the matching code to avoid assigning a register
+//  to a value that we cannot prove is positive.
+void X86DAGToDAGISel::LegalizeIndexForNaCl(SDValue N, X86ISelAddressMode &AM) {
+
+
+  if (AM.isRIPRelative())
+    return;
+
+  // MatchAddress wants to use the base register when there's only
+  // one register and no scale. We need to use the index register instead.
+  if (AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() &&
+      !AM.IndexReg.getNode()) {
+    AM.IndexReg = AM.Base_Reg;
+    AM.setBaseReg(SDValue());
+  }
+
+  // Case 1: Prevent negative indexes
+  bool NeedsFixing1 =
+       (AM.BaseType == X86ISelAddressMode::FrameIndexBase || AM.GV || AM.CP) &&
+       AM.IndexReg.getNode() && 
+       AM.Disp > 0;
+
+  // Case 2: Both index and base registers are being used
+  bool NeedsFixing2 =
+       (AM.BaseType == X86ISelAddressMode::RegBase) &&
+       AM.Base_Reg.getNode() &&
+       AM.IndexReg.getNode();
+
+  if (!NeedsFixing1 && !NeedsFixing2) 
+    return;
+
+  DebugLoc dl = N->getDebugLoc();
+  static const unsigned LogTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  assert(AM.Scale < sizeof(LogTable)/sizeof(LogTable[0]));
+  unsigned ScaleLog = LogTable[AM.Scale];
+  assert(ScaleLog <= 3);
+  SmallVector<SDNode*, 8> NewNodes;
+  
+  SDValue NewIndex = AM.IndexReg;
+  if (ScaleLog > 0) {
+    SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+    NewNodes.push_back(ShlCount.getNode());
+    SDValue ShlNode = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                      NewIndex, ShlCount);
+    NewNodes.push_back(ShlNode.getNode());
+    NewIndex = ShlNode;
+  }
+  if (AM.Disp > 0) {
+    SDValue DispNode = CurDAG->getConstant(AM.Disp, N.getValueType());
+    NewNodes.push_back(DispNode.getNode());
+
+    SDValue AddNode = CurDAG->getNode(ISD::ADD, dl, N.getValueType(), 
+                                  NewIndex, DispNode);
+    NewNodes.push_back(AddNode.getNode());
+    NewIndex = AddNode;
+  }
+
+  if (NeedsFixing2) {
+    SDValue AddBase = CurDAG->getNode(ISD::ADD, dl, N.getValueType(),
+                                      NewIndex, AM.Base_Reg); 
+    NewNodes.push_back(AddBase.getNode());
+    NewIndex = AddBase;
+    AM.setBaseReg(SDValue());
+  }
+  AM.Disp = 0;
+  AM.Scale = 1;
+  AM.IndexReg = NewIndex;
+
+  // Insert the new nodes into the topological ordering.
+  for (unsigned i=0; i < NewNodes.size(); i++) {
+    if (NewNodes[i]->getNodeId() == -1 ||
+        NewNodes[i]->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), NewNodes[i]);
+      NewNodes[i]->setNodeId(N.getNode()->getNodeId());
+    }
+  }
+}
+// @LOCALMOD-END
+
 /// getGlobalBaseReg - Return an SDNode that returns the value of
 /// the global base register. Output instructions required to
 /// initialize the global base register, if necessary.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 74902c69ca..f641940154 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -140,6 +140,11 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
     return new TargetLoweringObjectFileMachO();
   }
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl())
+    return new TargetLoweringObjectFileNaCl();
+  // @LOCALMOD-END
+
   if (Subtarget->isTargetELF())
     return new TargetLoweringObjectFileELF();
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
@@ -152,7 +157,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+  // @LOCALMOD-START
+  X86StackPtr = Subtarget->has64BitPointers() ? X86::RSP : X86::ESP;
+  // @LOCALMOD-END
 
   RegInfo = TM.getRegisterInfo();
   TD = getTargetData();
@@ -521,7 +528,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget->has64BitPointers()) {
     setExceptionPointerRegister(X86::RAX);
     setExceptionSelectorRegister(X86::RDX);
   } else {
@@ -539,7 +546,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
   } else {
@@ -551,13 +558,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Custom);
   else if (TM.Options.EnableSegmentedStacks)
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Custom);
   else
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Expand);
 
   if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
@@ -1229,6 +1239,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::XOR);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_THREAD_STACK_PADDING, MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_ALIGN,             MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TARGET_ARCH,          MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
   computeRegisterProperties();
 
   // On Darwin, -Os means optimize for size without hurting performance,
@@ -1573,7 +1593,16 @@ X86TargetLowering::LowerReturn(SDValue Chain,
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
-    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+    // @LOCALMOD-START
+    if (Subtarget->isTargetNaCl()) {
+      // NaCl 64 uses 32-bit pointers, so there might be some zero-ext needed.
+      SDValue Zext = DAG.getZExtOrTrunc(Val, dl, MVT::i64);
+      Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Zext, Flag);
+    } else {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+    }
+    // @LOCALMOD-END
+
     Flag = Chain.getValue(1);
 
     // RAX now acts like a return value.
@@ -1827,6 +1856,18 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       Fn->getName() == "main")
     FuncInfo->setForceFramePointer(true);
 
+  // @LOCALMOD-START
+  if (Subtarget->isTargetNaCl64()) {
+    FuncInfo->setForceFramePointer(true);
+  }
+  // @TODO(pdox): This shouldn't be necessary, but there is a bug
+  // where hasFP() changes during stack-slot spilling after register
+  // allocation has allocated ebp. Look into this.
+  if (Subtarget->isTargetNaCl32()) {
+    FuncInfo->setForceFramePointer(true);
+  }
+  // @LOCALMOD-END
+  
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWindows = Subtarget->isTargetWindows();
@@ -1921,7 +1962,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
-      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      // @LOCALMOD
+      Reg = MF.getRegInfo().createVirtualRegister(
+          getRegClassFor(getPointerTy()));
       FuncInfo->setSRetReturnReg(Reg);
     }
     SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2636,7 +2679,8 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   unsigned StackAlignment = TFI.getStackAlignment();
   uint64_t AlignMask = StackAlignment - 1;
   int64_t Offset = StackSize;
-  uint64_t SlotSize = TD->getPointerSize();
+  // @LOCALMOD
+  uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4;
   if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
     // Number smaller than 12 so just add the difference.
     Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
@@ -2995,13 +3039,14 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
-    uint64_t SlotSize = TD->getPointerSize();
+    uint64_t SlotSize = Subtarget->is64Bit() ? 8 : 4; // @LOCALMOD
     ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
                                                            false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
-  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+  return DAG.getFrameIndex(ReturnAddrIndex, // @LOCALMOD
+                           Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
 }
 
 
@@ -7262,7 +7307,8 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
-           unsigned char OperandFlags, bool LocalDynamic = false) {
+           unsigned char OperandFlags,
+           unsigned Opcode = X86ISD::TLSADDR) { // @LOCALMOD
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   DebugLoc dl = GA->getDebugLoc();
@@ -7270,16 +7316,12 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
                                            GA->getValueType(0),
                                            GA->getOffset(),
                                            OperandFlags);
-
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
-
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
+    Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 3); // @LOCALMOD
   } else {
     SDValue Ops[]  = { Chain, TGA };
-    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
+    Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 2); // @LOCALMOD
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7311,6 +7353,52 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                     X86::RAX, X86II::MO_TLSGD);
 }
 
+// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
+static SDValue
+LowerToTLSExecCall(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                   const EVT PtrVT, TLSModel::Model model, bool is64Bit) {
+
+  // See: http://code.google.com/p/nativeclient/issues/detail?id=1685
+  unsigned char TargetFlag;
+  unsigned Opcode;
+  if (model == TLSModel::LocalExec) {
+    TargetFlag = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+    Opcode = X86ISD::TLSADDR_LE;
+  } else if (model == TLSModel::InitialExec) {
+    TargetFlag = is64Bit ? X86II::MO_GOTTPOFF : X86II::MO_INDNTPOFF;
+    Opcode = X86ISD::TLSADDR_IE;
+  } else {
+    llvm_unreachable("Unknown TLS model");
+  }
+
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::EAX, // PtrVT is 32-bit.
+                    TargetFlag, Opcode);
+}
+
+// @LOCALMOD-START
+// Lower TLS accesses to a function call, rather than use segment registers.
+// Lower ISD::GlobalTLSAddress for NaCl 64 bit.
+static SDValue
+LowerToTLSNaCl64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                 const EVT PtrVT, TLSModel::Model model) {
+
+  // See: http://code.google.com/p/nativeclient/issues/detail?id=1685
+  unsigned char TargetFlag;
+  unsigned Opcode;
+  if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+    TargetFlag = X86II::MO_TLSGD;
+    Opcode = X86ISD::TLSADDR;
+  } else {
+    return LowerToTLSExecCall(GA, DAG, PtrVT, model, true);
+  }
+
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::EAX, // PtrVT is 32-bit.
+                    TargetFlag, Opcode);
+}
+// @LOCALMOD-END
+
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
                                            SelectionDAG &DAG,
                                            const EVT PtrVT,
@@ -7325,14 +7413,16 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   SDValue Base;
   if (is64Bit) {
     Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
-                      X86II::MO_TLSLD, /*LocalDynamic=*/true);
+                      X86II::MO_TLSLD,
+                      /*Opcode=*/X86ISD::TLSBASEADDR); // @LOCALMOD
   } else {
     SDValue InFlag;
     SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
         DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
     InFlag = Chain.getValue(1);
     Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
-                      X86II::MO_TLSLDM, /*LocalDynamic=*/true);
+                      X86II::MO_TLSLDM,
+                      /*Opcode=*/X86ISD::TLSBASEADDR); // @LOCALMOD
   }
 
   // Note: the CleanupLocalDynamicTLSPass will remove redundant computations
@@ -7421,6 +7511,11 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
     TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
+    // @LOCALMOD-START
+    if (Subtarget->isTargetNaCl64())
+      return LowerToTLSNaCl64(GA, DAG, getPointerTy(), model);
+    // @LOCALMOD-END
+
     switch (model) {
       case TLSModel::GeneralDynamic:
         if (Subtarget->is64Bit())
@@ -7431,9 +7526,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
                                            Subtarget->is64Bit());
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
-        return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+        // @LOCALMOD-START
+        if (llvm::TLSUseCall) {
+          return LowerToTLSExecCall(GA, DAG, getPointerTy(), model,
+                                    Subtarget->is64Bit());
+        } else {
+          return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
                                    Subtarget->is64Bit(),
                          getTargetMachine().getRelocationModel() == Reloc::PIC_);
+        }
+        // @LOCALMOD-END
     }
     llvm_unreachable("Unknown TLS model.");
   }
@@ -8360,6 +8462,10 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
 /// if it's possible.
 SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
                                      DebugLoc dl, SelectionDAG &DAG) const {
+   // @LOCALMOD: NaCl validator rejects BT, BTS, and BTC.
+  if (Subtarget->isTargetNaCl())
+    return SDValue();
+  
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
   if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -9203,14 +9309,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   SDValue Size  = Op.getOperand(1);
   // FIXME: Ensure alignment here
 
-  bool Is64Bit = Subtarget->is64Bit();
-  EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+  bool Has64BitPointers = Subtarget->has64BitPointers(); // @LOCALMOD
+  EVT SPTy = Has64BitPointers ? MVT::i64 : MVT::i32; // @LOCALMOD
 
   if (getTargetMachine().Options.EnableSegmentedStacks) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
-    if (Is64Bit) {
+    if (Subtarget->is64Bit()) { // @LOCALMOD
       // The 64 bit implementation of segmented stacks needs to clobber both r10
       // r11. This makes it impossible to use it along with nested parameters.
       const Function *F = MF.getFunction();
@@ -9223,7 +9329,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     }
 
     const TargetRegisterClass *AddrRegClass =
-      getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+      getRegClassFor(Has64BitPointers ? MVT::i64:MVT::i32); // @LOCALMOD
     unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
     Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
     SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -9232,7 +9338,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     return DAG.getMergeValues(Ops1, 2, dl);
   } else {
     SDValue Flag;
-    unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+    unsigned Reg = (Has64BitPointers ? X86::RAX : X86::EAX); // @LOCALMOD
 
     Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
     Flag = Chain.getValue(1);
@@ -9269,6 +9375,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   //   fp_offset         (48 - 48 + 8 * 16)
   //   overflow_arg_area (point to parameters coming in memory).
   //   reg_save_area
+  unsigned PointerSize = TD->getPointerSize(); // @LOCALMOD
   SmallVector<SDValue, 8> MemOps;
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
@@ -9291,7 +9398,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                     FIN, DAG.getIntPtrConstant(4));
   SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
-                                    getPointerTy());
+                                    getPointerTy()); // @LOCALMOD
   Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
                        MachinePointerInfo(SV, 8),
                        false, false, 0);
@@ -9299,11 +9406,12 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(8));
+                    FIN, DAG.getIntPtrConstant(PointerSize)); // @LOCALMOD
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
-                                    getPointerTy());
+                                    getPointerTy()); // @LOCALMOD
   Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
-                       MachinePointerInfo(SV, 16), false, false, 0);
+                       MachinePointerInfo(SV, 8+PointerSize), // @LOCALMOD
+                       false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                      &MemOps[0], MemOps.size());
@@ -9313,7 +9421,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() &&
          "LowerVAARG only handles 64-bit va_arg!");
   assert((Subtarget->isTargetLinux() ||
-          Subtarget->isTargetDarwin()) &&
+          Subtarget->isTargetDarwin() ||
+          Subtarget->isTargetNaCl()) && // @LOCALMOD
           "Unhandled target in LowerVAARG");
   assert(Op.getNode()->getNumOperands() == 4);
   SDValue Chain = Op.getOperand(0);
@@ -9386,11 +9495,68 @@ SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc DL = Op.getDebugLoc();
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       DAG.getIntPtrConstant(8+2*TD->getPointerSize()), // @LM
+                       TD->getPointerABIAlignment(), // @LOCALMOD
+                       /*isVolatile*/false,
                        false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
+//////////////////////////////////////////////////////////////////////
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/stubs/tls_params.h
+SDValue X86TargetLowering::LowerNaClTpAlign(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  // size_t __nacl_tp_alignment () {
+  //   return 64;
+  // }
+  return DAG.getConstant(64, Op.getValueType().getSimpleVT());
+}
+
+SDValue X86TargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tls_offset (size_t tls_size) {
+  //   return -tls_size;
+  // }
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+                     Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) {
+  //   return 0;
+  // }
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue
+X86TargetLowering::LowerNaClThreadStackPadding(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // size_t __nacl_thread_stack_padding () {
+  //   return reg_size;
+  // }
+  return DAG.getConstant(RegInfo->getSlotSize(),
+                         Op.getValueType().getSimpleVT());
+}
+
+SDValue
+X86TargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const {
+  // int __nacl_target_arch () {
+  //   return (is_64_bit ?
+  //           PnaclTargetArchitectureX86_64 :
+  //           PnaclTargetArchitectureX86_32);
+  // }
+  return DAG.getConstant((Subtarget->is64Bit() ?
+                          PnaclTargetArchitectureX86_64 :
+                          PnaclTargetArchitectureX86_32),
+                         Op.getValueType().getSimpleVT());
+}
+
+//////////////////////////////////////////////////////////////////////
+
 // getTargetVShiftNOde - Handle vector element shifts where the shift amount
 // may or may not be a constant. Takes immediate version of shift as input.
 static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
@@ -9788,8 +9954,10 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
   if (Depth > 0) {
     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
     SDValue Offset =
-      DAG.getConstant(TD->getPointerSize(),
-                      Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+      // @LOCALMOD-BEGIN
+      DAG.getConstant(Subtarget->is64Bit() ? 8 : 4,
+                      getPointerTy());
+      // @LOCALMOD-END
     return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
                        DAG.getNode(ISD::ADD, dl, getPointerTy(),
                                    FrameAddr, Offset),
@@ -9809,7 +9977,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+  unsigned FrameReg = Subtarget->has64BitPointers() ? X86::RBP : X86::EBP; // @LOCALMOD
   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   while (Depth--)
     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -9820,7 +9988,10 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  return DAG.getIntPtrConstant(2*TD->getPointerSize());
+  // @LOCALMOD-START
+  int SlotSize = Subtarget->is64Bit() ? 8 : 4;
+  return DAG.getIntPtrConstant(2*SlotSize);
+  // @LOCALMOD-END
 }
 
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
@@ -9829,14 +10000,17 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Offset    = Op.getOperand(1);
   SDValue Handler   = Op.getOperand(2);
   DebugLoc dl       = Op.getDebugLoc();
+  // @LOCALMOD-START
+  bool Has64BitPtrs = Subtarget->has64BitPointers();
 
   SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
-                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+                                     Has64BitPtrs ? X86::RBP : X86::EBP,
                                      getPointerTy());
-  unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
-
+  unsigned StoreAddrReg = (Has64BitPtrs ? X86::RCX : X86::ECX);
+  int SlotSize = Subtarget->is64Bit() ? 8 : 4;
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
-                                  DAG.getIntPtrConstant(TD->getPointerSize()));
+                                  DAG.getIntPtrConstant(SlotSize));
+  // @LOCALMOD-END
   StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
                        false, false, 0);
@@ -10893,6 +11067,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
+  // @LOCALMOD-BEGIN
+  case ISD::NACL_THREAD_STACK_PADDING:
+    return LowerNaClThreadStackPadding(Op, DAG);
+  case ISD::NACL_TP_ALIGN:         return LowerNaClTpAlign(Op, DAG);
+  case ISD::NACL_TP_TLS_OFFSET:    return LowerNaClTpTlsOffset(Op, DAG);
+  case ISD::NACL_TP_TDB_OFFSET:    return LowerNaClTpTdbOffset(Op, DAG);
+  case ISD::NACL_TARGET_ARCH:      return LowerNaClTargetArch(Op, DAG);
+  // @LOCALMOD-END
   }
 }
 
@@ -11123,6 +11305,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FRCP:               return "X86ISD::FRCP";
   case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
   case X86ISD::TLSBASEADDR:        return "X86ISD::TLSBASEADDR";
+  case X86ISD::TLSADDR_LE:         return "X86ISD::TLSADDR_LE"; // @LOCALMOD
+  case X86ISD::TLSADDR_IE:         return "X86ISD::TLSADDR_IE"; // @LOCALMOD
   case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
   case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
   case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
@@ -11833,9 +12017,11 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
   // Machine Information
+  bool IsNaCl = Subtarget->isTargetNaCl(); // @LOCALMOD
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *AddrRegClass =
+    getRegClassFor(getPointerTy()); // @LOCALMOD
   const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
   DebugLoc DL = MI->getDebugLoc();
 
@@ -11863,7 +12049,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   MachineBasicBlock *overflowMBB;
   MachineBasicBlock *offsetMBB;
   MachineBasicBlock *endMBB;
-
+  
   unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
   unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
   unsigned OffsetReg = 0;
@@ -11944,29 +12130,39 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   }
 
   // In offsetMBB, emit code to use the reg_save_area.
+  unsigned Opc; // @LOCALMOD
   if (offsetMBB) {
     assert(OffsetReg != 0);
 
     // Read the reg_save_area address.
     unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
-    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+    Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD
+    BuildMI(offsetMBB, DL, TII->get(Opc), RegSaveReg) // @LOCALMOD
       .addOperand(Base)
       .addOperand(Scale)
       .addOperand(Index)
-      .addDisp(Disp, 16)
+      .addDisp(Disp, 8+TD->getPointerSize()) // @LOCALMOD
       .addOperand(Segment)
       .setMemRefs(MMOBegin, MMOEnd);
 
     // Zero-extend the offset
-    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
-      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
-        .addImm(0)
-        .addReg(OffsetReg)
-        .addImm(X86::sub_32bit);
+    // @LOCALMOD-BEGIN
+    unsigned OffsetRegExt;
+    if (IsNaCl) {
+      OffsetRegExt = OffsetReg;
+    } else {
+      OffsetRegExt = MRI.createVirtualRegister(AddrRegClass);
+        BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetRegExt)
+          .addImm(0)
+          .addReg(OffsetReg)
+          .addImm(X86::sub_32bit);
+    }
+    // @LOCALMOD-END
 
     // Add the offset to the reg_save_area to get the final address.
-    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
-      .addReg(OffsetReg64)
+    Opc = IsNaCl ? X86::ADD32rr : X86::ADD64rr; // @LOCALMOD
+    BuildMI(offsetMBB, DL, TII->get(Opc), OffsetDestReg)
+      .addReg(OffsetRegExt) // @LOCALMOD
       .addReg(RegSaveReg);
 
     // Compute the offset for the next argument
@@ -11996,7 +12192,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
 
   // Load the overflow_area address into a register.
   unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
-  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+  Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc), OverflowAddrReg)
     .addOperand(Base)
     .addOperand(Scale)
     .addOperand(Index)
@@ -12012,11 +12209,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
     unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
 
     // aligned_addr = (addr + (align-1)) & ~(align-1)
-    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+    Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD
+    BuildMI(overflowMBB, DL, TII->get(Opc), TmpReg)
       .addReg(OverflowAddrReg)
       .addImm(Align-1);
 
-    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+    Opc = IsNaCl ? X86::AND32ri : X86::AND64ri32; // @LOCALMOD
+    BuildMI(overflowMBB, DL, TII->get(Opc), OverflowDestReg)
       .addReg(TmpReg)
       .addImm(~(uint64_t)(Align-1));
   } else {
@@ -12027,12 +12226,14 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   // Compute the next overflow address after this argument.
   // (the overflow address should be kept 8-byte aligned)
   unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
-  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+  Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc), NextAddrReg)
     .addReg(OverflowDestReg)
     .addImm(ArgSizeA8);
 
   // Store the new overflow address.
-  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+  Opc = IsNaCl ? X86::MOV32mr : X86::MOV64mr; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc))
     .addOperand(Base)
     .addOperand(Scale)
     .addOperand(Index)
@@ -12772,6 +12973,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
 
   case X86::VAARG_64:
+  case X86::NACL_CG_VAARG_64:
     return EmitVAARG64WithCustomInserter(MI, BB);
   }
 }
@@ -14291,6 +14493,12 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   }
 
   unsigned Bits = VT.getSizeInBits();
+  // @LOCALMOD-START
+  // Due to a limitation in NaCl's 32-bit validator,
+  // 16-bit shld instructions are illegal in 32-bit NaCl.
+  if (Subtarget->isTargetNaCl() && !Subtarget->is64Bit() && Bits == 16)
+    return SDValue();
+  // @LOCALMOD-END
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2f561a0178..1888a5dba6 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -210,6 +210,13 @@ namespace llvm {
       // TLSBASEADDR - Thread Local Storage. A call to get the start address
       // of the TLS block for the current module.
       TLSBASEADDR,
+      // @LOCALMOD-BEGIN
+      // TLSADDR_LE - Thread Local Storage. (Local Exec Model)
+      TLSADDR_LE,
+
+      // TLSADDR_IE - Thread Local Storage. (Initial Exec Model)
+      TLSADDR_IE,
+      // @LOCALMOD-END
 
       // TLSCALL - Thread Local Storage.  When calling to an OS provided
       // thunk at the address from an earlier relocation.
@@ -424,6 +431,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
   class X86TargetLowering : public TargetLowering {
+
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
@@ -792,11 +800,19 @@ namespace llvm {
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const;
 
+    // @LOCALMOD-BEGIN
+    SDValue LowerNaClThreadStackPadding(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpAlign(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const;
+    // @LOCALMOD-END
+
     // Utility functions to help LowerVECTOR_SHUFFLE
     SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
     SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
-
+    
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 0eee083393..3cfbf52d7d 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -32,8 +32,9 @@ def LEA64_32r : I<0x8D, MRMSrcMem,
                   [(set GR32:$dst, lea32addr:$src)], IIC_LEA>,
                   Requires<[In64BitMode]>;
 
+// @LOCALMOD (lea64mem)
 let isReMaterializable = 1 in
-def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
                   "lea{q}\t{$src|$dst}, {$dst|$src}",
                   [(set GR64:$dst, lea64addr:$src)], IIC_LEA>;
 
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 99c2b8f955..43b7c2596d 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -92,8 +92,8 @@ def VAARG_64 : I<0, Pseudo,
                  "#VAARG_64 $dst, $ap, $size, $mode, $align",
                  [(set GR64:$dst,
                     (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
-                  (implicit EFLAGS)]>;
-
+                  (implicit EFLAGS)]>,
+                 Requires<[NotNaCl]>;
 // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
 // targets.  These calls are needed to probe the stack when allocating more than
 // 4k bytes in one go. Touching the stack at 4K increments is necessary to
@@ -379,7 +379,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
 def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_addr32",
                   [(X86tlsaddr tls32addr:$sym)]>,
-                  Requires<[In32BitMode]>;
+                  Requires<[In32BitMode, NotNaCl]>;
 def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_base_addr32",
                   [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
@@ -1013,9 +1013,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
 def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>;
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotNaCl]>;
 def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>;
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotNaCl]>;
 
 // tailcall stuff
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
@@ -1027,7 +1027,7 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
 // callee-saved register.
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
           (TCRETURNmi addr:$dst, imm:$off)>,
-          Requires<[In32BitMode, IsNotPIC]>;
+          Requires<[In32BitMode, IsNotPIC, NotNaCl]>;
 
 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi texternalsym:$dst, imm:$off)>,
@@ -1039,27 +1039,27 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotNaCl]>;
 
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
           (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotNaCl]>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotNaCl]>;
 
 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
           (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, NotNaCl]>;
 
 // Normal calls, with various flavors of addresses.
 def : Pat<(X86call (i32 tglobaladdr:$dst)),
-          (CALLpcrel32 tglobaladdr:$dst)>;
+          (CALLpcrel32 tglobaladdr:$dst)>, Requires<[NotNaCl]>;
 def : Pat<(X86call (i32 texternalsym:$dst)),
-          (CALLpcrel32 texternalsym:$dst)>;
+          (CALLpcrel32 texternalsym:$dst)>, Requires<[NotNaCl]>;
 def : Pat<(X86call (i32 imm:$dst)),
-          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr, NotNaCl]>;
 
 // Comparisons.
 
@@ -1484,19 +1484,19 @@ def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                            sub_8bit_hi))>;
+                            sub_8bit_hi))>, Requires<[NotNaCl]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
+      Requires<[In64BitMode, NotNaCl]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
+      Requires<[In64BitMode, NotNaCl]>;
 
 
 // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 028f364976..097a762e15 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -109,7 +109,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
                      [(brind GR32:$dst)], IIC_JMP_REG>, Requires<[In32BitMode]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
-                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode]>;
+                     [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>, Requires<[In32BitMode,NotNaCl]>;
 
   def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
                      [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>;
@@ -123,7 +123,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                           (ins i32imm:$off, i16imm:$seg),
                           "ljmp{l}\t{$seg, $off|$off, $seg}", [], IIC_JMP_FAR_PTR>;
   def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>;
+                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, Requires<[NotNaCl]>;
 
   def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
                      "ljmp{w}\t{*}$dst", [], IIC_JMP_FAR_MEM>, OpSize;
@@ -152,10 +152,10 @@ let isCall = 1 in
                            "call{l}\t$dst", [], IIC_CALL_RI>, Requires<[In32BitMode]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
                         "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
-                         Requires<[In32BitMode]>;
+                         Requires<[In32BitMode,NotNaCl]>; // @LOCALMOD
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
                         "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))], IIC_CALL_MEM>,
-                        Requires<[In32BitMode]>;
+                        Requires<[In32BitMode,NotNaCl]>; // @LOCALMOD
 
     def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
                              (ins i16imm:$off, i16imm:$seg),
@@ -175,9 +175,20 @@ let isCall = 1 in
     let isAsmParserOnly = 1 in
       def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
                        (outs), (ins i16imm_pcrel:$dst, variable_ops),
-                       "callw\t$dst", []>, OpSize;
+                       "callw\t$dst", []>, OpSize,
+                       Requires<[NotNaCl]>; // @LOCALMOD
   }
 
+// @LOCALMOD-BEGIN
+// These CodeGen patterns are normally part of the declaration above.
+// However, we need to be able to disable these patterns for NaCl
+// without disabling the the instruction itself. (so we can use the
+// instruction in assembly input)
+def : Pat<(X86call GR32:$dst),
+          (CALL32r GR32:$dst)>, Requires<[NotNaCl]>;
+def : Pat<(X86call (loadi32 addr:$dst)),
+          (CALL32m addr:$dst)>, Requires<[NotNaCl]>;
+// @LOCALMOD-END
 
 // Tail call stuff.
 
@@ -202,7 +213,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                    "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
-                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>, Requires<[NotNaCl]>; // @LOCALMOD
 }
 
 
@@ -220,18 +231,18 @@ let isCall = 1, Uses = [RSP] in {
   def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
                         (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
                         "call{q}\t$dst", [], IIC_CALL_RI>,
-                      Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, NotNaCl]>; // @LOCALMOD
   def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
                         "call{q}\t{*}$dst", [(X86call GR64:$dst)],
                         IIC_CALL_RI>,
-                      Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, NotNaCl]>; // @LOCALMOD
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
                         IIC_CALL_MEM>,
-                      Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, NotNaCl]>; // @LOCALMOD
 
   def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
-                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>, Requires<[NotNaCl]>; // @LOCALMOD
 }
 
 let isCall = 1, isCodeGenOnly = 1 in
@@ -266,5 +277,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
 
   let mayLoad = 1 in
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
-                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>,
+                   Requires<[NotNaCl]>; // @LOCALMOD
 }
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 38f190aa71..cbec891d7e 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -52,6 +52,7 @@ def MRM_DC : Format<52>;
 def MRM_DD : Format<53>;
 def MRM_DE : Format<54>;
 def MRM_DF : Format<55>;
+def CustomFrm : Format<62>; // @LOCALMOD
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7d5b25084e..dabb181cce 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -276,12 +276,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
                   Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
+  // @LOCALMOD-BEGIN
+  unsigned NoForwardForNaCl =
+      tm.getSubtarget<X86Subtarget>().isTargetNaCl() ? TB_NO_FORWARD : 0;
+  // @LOCALMOD-END
+
   static const X86OpTblEntry OpTbl0[] = {
     { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
     { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
     { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
-    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD },
-    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD },
+    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::CMP16ri,     X86::CMP16mi,       TB_FOLDED_LOAD },
     { X86::CMP16ri8,    X86::CMP16mi8,      TB_FOLDED_LOAD },
     { X86::CMP16rr,     X86::CMP16mr,       TB_FOLDED_LOAD },
@@ -308,8 +313,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL32r,     X86::IMUL32m,       TB_FOLDED_LOAD },
     { X86::IMUL64r,     X86::IMUL64m,       TB_FOLDED_LOAD },
     { X86::IMUL8r,      X86::IMUL8m,        TB_FOLDED_LOAD },
-    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD },
-    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD },
+    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::MOV16ri,     X86::MOV16mi,       TB_FOLDED_STORE },
     { X86::MOV16rr,     X86::MOV16mr,       TB_FOLDED_STORE },
     { X86::MOV32ri,     X86::MOV32mi,       TB_FOLDED_STORE },
@@ -348,8 +353,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::SETOr,       X86::SETOm,         TB_FOLDED_STORE },
     { X86::SETPr,       X86::SETPm,         TB_FOLDED_STORE },
     { X86::SETSr,       X86::SETSm,         TB_FOLDED_STORE },
-    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD },
-    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD },
+    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },
     { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },
     { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
@@ -2638,6 +2643,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
                << " to " << RI.getName(DestReg) << '\n');
+  MBB.dump();
   llvm_unreachable("Cannot emit physreg copy instruction");
 }
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9ce61409da..14e132d7b3 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -208,6 +208,14 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
 def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+// @LOCALMOD-BEGIN
+def X86tlsaddr_le : SDNode<"X86ISD::TLSADDR_LE", SDT_X86TLSADDR,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86tlsaddr_ie : SDNode<"X86ISD::TLSADDR_IE", SDT_X86TLSADDR,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+// @LOCALMOD-END
+
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
@@ -477,6 +485,13 @@ def i64i8imm   : Operand<i64> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
+// @LOCALMOD
+def lea64mem : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printi32mem";
   let AsmOperandLowerMethod = "lower_lea64_32mem";
@@ -492,7 +507,8 @@ def lea64_32mem : Operand<i32> {
 // Define X86 specific addressing mode.
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
 def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
-                               [add, sub, mul, X86mul_imm, shl, or, frameindex],
+                               [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                               X86WrapperRIP], // @LOCALMOD
                                []>;
 def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
@@ -1611,6 +1627,12 @@ let Predicates = [HasBMI2] in {
 //===----------------------------------------------------------------------===//
 
 include "X86InstrArithmetic.td"
+
+//===----------------------------------------------------------------------===//
+// NaCl support (@LOCALMOD)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrNaCl.td"
 include "X86InstrCMovSetCC.td"
 include "X86InstrExtension.td"
 include "X86InstrControl.td"
@@ -1646,9 +1668,6 @@ include "X86InstrCompiler.td"
 // Assembler Mnemonic Aliases
 //===----------------------------------------------------------------------===//
 
-def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
-
 def : MnemonicAlias<"cbw",  "cbtw">;
 def : MnemonicAlias<"cwde", "cwtl">;
 def : MnemonicAlias<"cwd",  "cwtd">;
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 0168d12231..68d67c5d3d 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -18,6 +18,8 @@
 #include "X86TargetMachine.h"
 #include "llvm/Function.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"//TODO(dschuff):don't forget to remove these
+#include "llvm/Support/Disassembler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Valgrind.h"
 #include <cstdlib>
@@ -82,7 +84,7 @@ static TargetJITInfo::JITCompilerFn JITCompilerFunction;
 // Provide a wrapper for X86CompilationCallback2 that saves non-traditional
 // callee saved registers, for the fastcc calling convention.
 extern "C" {
-#if defined(X86_64_JIT)
+#if defined(X86_64_JIT) && !defined(__native_client__)
 # ifndef _MSC_VER
   // No need to save EAX/EDX for X86-64.
   void X86CompilationCallback(void);
@@ -230,7 +232,11 @@ extern "C" {
     "popl    %ebp\n"
     CFI(".cfi_adjust_cfa_offset -4\n")
     CFI(".cfi_restore %ebp\n")
+#if defined(__native_client__) // @LOCALMOD-BEGIN
+    "popl %ecx; nacljmp %ecx\n"
+#else
     "ret\n"
+#endif // @LOCALMOD-END
     CFI(".cfi_endproc\n")
     SIZE(X86CompilationCallback)
   );
@@ -295,7 +301,11 @@ extern "C" {
     "popl    %ebp\n"
     CFI(".cfi_adjust_cfa_offset -4\n")
     CFI(".cfi_restore %ebp\n")
+#if defined(__native_client__) // @LOCALMOD-BEGIN
+    "popl %ecx; nacljmp %ecx\n"
+#else
     "ret\n"
+#endif // @LOCALMOD-END
     CFI(".cfi_endproc\n")
     SIZE(X86CompilationCallback_SSE)
   );
@@ -469,7 +479,14 @@ TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
   // The 32-bit stub contains a 5-byte call|jmp.
   // If the stub is a call to the compilation callback, an extra byte is added
   // to mark it as a stub.
+#ifdef __native_client__
+  // NaCl call targets must be bundle-aligned. In the case of stubs with
+  // CALLs, the calls do not need to be aligned to the end of the bundle
+  // because there is no return
+  StubLayout Result = {32, 32};//TODO(dschuff): use named constant here
+#else
   StubLayout Result = {14, 4};
+#endif
   return Result;
 }
 
@@ -498,6 +515,9 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
     JCE.emitByte(0xE9);
     JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
 #endif
+    DEBUG(dbgs() <<"emitted stub: "<< sys::disassembleBuffer(
+        (uint8_t *)Result,JCE.getCurrentPCValue()-(uintptr_t)Result,
+        (intptr_t)Result));
     return Result;
   }
 
@@ -519,6 +539,9 @@ void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
   // initialize the buffer with garbage, which means it may follow a
   // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
   JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
+  DEBUG(dbgs()  <<"emitted stub: "<< sys::disassembleBuffer(
+      (uint8_t *)Result,JCE.getCurrentPCValue()-(uintptr_t)Result,
+      (intptr_t)Result));
   return Result;
 }
 
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index df7507ce3d..933e6ce34c 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -691,7 +691,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     
     // Emit the call.
     MCSymbol *PICBase = MF->getPICBaseSymbol();
-    TmpInst.setOpcode(X86::CALLpcrel32);
+    // @LOCALMOD-BEGIN
+    // For NaCl, the call should be aligned to the end of a bundle. Since the
+    // call is at the end of the bundle, there should be no padding between
+    // the call and the next instruction (the label should still make sense).
+    TmpInst.setOpcode(getSubtarget().isTargetNaCl() ?
+                      X86::NACL_CALL32d : X86::CALLpcrel32);
+    // @LOCALMOD-END
     // FIXME: We would like an efficient form for this, so we don't have to do a
     // lot of extra uniquing.
     TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 3b727881c7..8519592370 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -353,6 +353,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
+  // @LOCALMOD-START
+  const X86Subtarget& Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
+  if (Subtarget.isTargetNaCl64()) {
+    Reserved.set(X86::R15);
+    Reserved.set(X86::R15D);
+    Reserved.set(X86::R15W);
+    Reserved.set(X86::R15B);
+    Reserved.set(X86::RBP);
+    Reserved.set(X86::EBP);
+    Reserved.set(X86::BP);
+    Reserved.set(X86::BPL);
+  }
+  // @LOCALMOD-END
+
   return Reserved;
 }
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index ae2d4d023e..ea6bb9df9b 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -268,6 +268,9 @@ let Namespace = "X86" in {
   // Pseudo index registers
   def EIZ : Register<"eiz">;
   def RIZ : Register<"riz">;
+  
+  def PSEUDO_NACL_SEG : Register<"nacl">; // @LOCALMOD
+
 }
 
 
@@ -335,6 +338,10 @@ def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
 def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
 def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
 def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+// @LOCALMOD-START
+def GR32_TC_64: RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI,
+                                                 R8D, R9D, R11D)>;
+// @LOCALMOD-END
 def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
                                                      R8, R9, R11, RIP)>;
 def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index 7c6788f578..f770178dbd 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -35,6 +35,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                          MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    // TODO: Can we allow this optimization for Native Client?
+    // At the very least, pointer size needs to be fixed below.
+    return SDValue();
+  }
+  // @LOCALMOD-END
+
   // If to a segment-relative address space, use the default lowering.
   if (DstPtrInfo.getAddrSpace() >= 256)
     return SDValue();
@@ -190,6 +198,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
     return SDValue();
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    // TODO(pdox): Allow use of the NaCl pseudo-instruction for REP MOV
+    return SDValue();
+  }
+  // @LOCALMOD-END
+
   /// If not DWORD aligned, it is more efficient to call the library.  However
   /// if calling the library is not allowed (AlwaysInline), then soldier on as
   /// the code generated here is better than the long load-store sequence we
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 5be3126728..ecd8e4405f 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -160,7 +160,11 @@ const char *X86Subtarget::getBZeroEntry() const {
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   if (In64BitMode)
     return false;
-  return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+  // @LOCALMOD-BEGIN
+  // Upstream LLVM bug fix
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2367
+  return isTargetELF() && TM.getRelocationModel() == Reloc::Static;
+  // @LOCALMOD-END
 }
 
 /// getSpecialAddressLatency - For targets where it is beneficial to
@@ -412,12 +416,14 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
   assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
-  // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
-  // 32 and 64 bit) and for all 64-bit targets.
+  // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux, Solaris (both
+  // 32 and 64 bit), NaCl and for all 64-bit targets.
   if (StackAlignOverride)
     stackAlignment = StackAlignOverride;
   else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
-           isTargetSolaris() || In64BitMode)
+           isTargetSolaris() ||
+           isTargetNaCl() || // @LOCALMOD
+           In64BitMode)
     stackAlignment = 16;
 }
 
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 1af585f2ad..80bb4d6fca 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -185,6 +185,9 @@ public:
 
   bool is64Bit() const { return In64BitMode; }
 
+  // @LOCALMOD
+  bool has64BitPointers() const { return is64Bit() && !isTargetNaCl(); }
+
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index c066a5603d..6d05a91a32 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -43,6 +43,8 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                 getSubtargetImpl()->isTargetWindows()) ?
                "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
                "n8:16:32-S32" :
+               getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
+               "e-p:32:32-s:32-f64:64:64-f32:32:32-f80:128:128-i64:64:64-n8:16:32-S128" :
                "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
                "n8:16:32-S128"),
     InstrInfo(*this),
@@ -59,7 +61,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
   : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
-    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+    DataLayout(getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
+               "e-p:32:32-s:64-f64:64:64-f32:32:32-f80:128:128-i64:64:64-"
+               "n8:16:32:64-S128" :
+               "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
                "n8:16:32:64-S128"),
     InstrInfo(*this),
     TSInfo(*this),
@@ -180,11 +185,25 @@ bool X86PassConfig::addPreEmitPass() {
     ShouldPrint = true;
   }
 
+  // @LOCALMOD-START
+  if (getX86Subtarget().isTargetNaCl()) {
+    PM->add(createX86NaClRewritePass());
+    ShouldPrint = true;
+  }
+  // @LOCALMOD-END
+
   return ShouldPrint;
 }
 
 bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
                                       JITCodeEmitter &JCE) {
+  // @LOCALMOD-START
+  // Add this pass here instead of as a PreEmitPass because this function is
+  // only called in JIT mode
+  if (Subtarget.isTargetNaCl()) {
+    PM.add(createX86NaClRewriteFinalPass());
+  }
+  // @LOCALMOD-END
   PM.add(createX86JITCodeEmitterPass(*this, JCE));
 
   return false;
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 8e935af67f..d4552d8136 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -20,6 +20,9 @@
 #include "X86ISelLowering.h"
 #include "X86FrameLowering.h"
 #include "X86JITInfo.h"
+#ifdef __native_client__
+#include "X86NaClJITInfo.h"
+#endif
 #include "X86SelectionDAGInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/Target/TargetMachine.h"
@@ -84,7 +87,11 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86InstrInfo      InstrInfo;
   X86SelectionDAGInfo TSInfo;
   X86TargetLowering TLInfo;
+#ifdef __native_client__
+  X86NaClJITInfo        JITInfo;
+#else
   X86JITInfo        JITInfo;
+#endif
 public:
   X86_32TargetMachine(const Target &T, StringRef TT,
                       StringRef CPU, StringRef FS, const TargetOptions &Options,
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 718f35ea84..32bfba96bb 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -9,9 +9,11 @@
 
 #include "X86TargetObjectFile.h"
 #include "X86TargetMachine.h"
+#include "X86Subtarget.h"  // @LOCALMOD
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h" // @LOCALMOD
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Support/Dwarf.h"
@@ -42,3 +44,30 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
                         MachineModuleInfo *MMI) const {
   return Mang->getSymbol(GV);
 }
+
+// @LOCALMOD-START
+// NOTE: this was largely lifted from
+// lib/Target/ARM/ARMTargetObjectFile.cpp
+//
+// The default is .ctors/.dtors while the arm backend uses
+// .init_array/.fini_array
+//
+// Without this the linker defined symbols __fini_array_start and
+// __fini_array_end do not have useful values. c.f.:
+// http://code.google.com/p/nativeclient/issues/detail?id=805
+void TargetLoweringObjectFileNaCl::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  StaticCtorSection =
+    getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  StaticDtorSection =
+    getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+}
+// @LOCALMOD-END
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index a02a36809e..34c1234eae 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -32,6 +32,13 @@ namespace llvm {
                             MachineModuleInfo *MMI) const;
   };
 
+  // @LOCALMOD-BEGIN
+  class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF {
+  public:
+    virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
+  };
+ // @LOCALMOD-END
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index d9911bfb45..51d0a002e1 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -58,6 +58,15 @@ namespace {
 	    continue;
 	  if (I->getName() == "llvm.global_ctors")
 	    continue;
+          // @LOCALMOD-BEGIN - this is likely upstreamable
+          // Note: there will likely be more cases once this
+          // is exercises more thorougly.
+	  if (I->getName() == "llvm.global_dtors")
+            continue;
+          // not observed yet 
+          if (I->hasExternalWeakLinkage()) 
+	    continue;
+          // @LOCALMOD-END
 	}
 
         if (I->hasLocalLinkage())
@@ -72,8 +81,15 @@ namespace {
 	} else {
 	  if (I->hasAvailableExternallyLinkage())
 	    continue;
-	}
-
+          // @LOCALMOD-BEGIN - this is likely upstreamable
+          // Note: there will likely be more cases once this
+          // is exercises more thorougly.
+          // observed for pthread_cancel
+          if (I->hasExternalWeakLinkage())
+	    continue;
+          // @LOCALMOD-END
+        }
+ 
         if (I->hasLocalLinkage())
           I->setVisibility(GlobalValue::HiddenVisibility);
         I->setLinkage(GlobalValue::ExternalLinkage);
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index 8b1df92fa2..fb6fa26f11 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -10,6 +10,10 @@
 LEVEL = ../..
 PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello
 
+ifeq ($(NACL_SANDBOX),1)
+  PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
+endif
+
 include $(LEVEL)/Makefile.config
 
 # No support for plugins on windows targets
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 4254fb29e8..726ea5bbb0 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -45,6 +45,116 @@ void GlobalValue::destroyConstant() {
   llvm_unreachable("You can't GV->destroyConstant()!");
 }
 
+// @LOCALMOD-BEGIN
+
+// Extract the version information from GV.
+static void ExtractVersion(const GlobalValue *GV,
+                           StringRef *Name,
+                           StringRef *Ver,
+                           bool *IsDefault) {
+  // The version information is stored in the GlobalValue's name, e.g.:
+  //
+  //     GV Name      Name  Ver  IsDefault
+  //    ------------------------------------
+  //     foo@@V1 -->  foo   V1     true
+  //     bar@V2  -->  bar   V2     false
+  //     baz     -->  baz          false
+
+  StringRef GVName = GV->getName();
+  size_t atpos = GVName.find("@");
+  if (atpos == StringRef::npos) {
+    *Name = GVName;
+    *Ver = "";
+    *IsDefault = false;
+    return;
+  }
+  *Name = GVName.substr(0, atpos);
+  ++atpos;
+  if (atpos < GVName.size() && GVName[atpos] == '@') {
+    *IsDefault = true;
+    ++atpos;
+  } else {
+    *IsDefault = false;
+  }
+  *Ver = GVName.substr(atpos);
+}
+
+// Set the version information on GV.
+static void SetVersion(Module *M,
+                       GlobalValue *GV,
+                       StringRef Ver,
+                       bool IsDefault) {
+  StringRef Name;
+  StringRef PrevVersion;
+  bool PrevIsDefault;
+  ExtractVersion(GV, &Name, &PrevVersion, &PrevIsDefault);
+
+  // If this symbol already has a version, make sure it matches.
+  if (!PrevVersion.empty()) {
+    if (!PrevVersion.equals(Ver) || PrevIsDefault != IsDefault) {
+      llvm_unreachable("Trying to override symbol version info!");
+    }
+    return;
+  }
+  // If there's no version to set, there's nothing to do.
+  if (Ver.empty())
+    return;
+
+  // Make sure the versioned symbol name doesn't already exist.
+  std::string NewName = Name.str() + (IsDefault ? "@@" : "@") + Ver.str();
+  if (M->getNamedValue(NewName)) {
+    // It may make sense to do this as long as one of the globals being
+    // merged is only a declaration. But since this situation seems to be
+    // a corner case, for now it is unimplemented.
+    llvm_unreachable("Merging unversioned global into "
+                     "existing versioned global is unimplemented");
+  }
+  GV->setName(NewName);
+}
+
+StringRef GlobalValue::getUnversionedName() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  return Name;
+}
+
+StringRef GlobalValue::getVersion() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  return Ver;
+}
+
+bool GlobalValue::isDefaultVersion() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  // It is an error to call this function on an unversioned symbol.
+  assert(!Ver.empty());
+  return IsDefaultVersion;
+}
+
+void GlobalValue::setVersionDef(StringRef Version, bool IsDefault) {
+  // This call only makes sense for definitions.
+  assert(!isDeclaration());
+  SetVersion(Parent, this, Version, IsDefault);
+}
+
+void GlobalValue::setNeeded(StringRef Version, StringRef DynFile) {
+  // This call makes sense on declarations or
+  // available-externally definitions.
+  // TODO(pdox): If this is a definition, should we turn it
+  //             into a declaration here?
+  assert(isDeclaration() || hasAvailableExternallyLinkage());
+  SetVersion(Parent, this, Version, false);
+  Parent->addNeededRecord(DynFile, this);
+}
+// @LOCALMOD-END
+
 /// copyAttributesFrom - copy all additional attributes (those not needed to
 /// create a GlobalValue) from the GlobalValue Src to this one.
 void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 8ea36659b8..3799ed6c0b 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ErrorHandling.h" // @LOCALMOD
 #include "SymbolTableListTraitsImpl.h"
 #include <algorithm>
 #include <cstdarg>
@@ -607,3 +608,180 @@ void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes,
                                  bool OnlyNamed) const {
   TypeFinder(StructTypes, OnlyNamed).run(*this);
 }
+
+// @LOCALMOD-BEGIN
+// TODO(pdox):
+// If possible, use actual bitcode records instead of NamedMetadata.
+// This is contingent upon whether we can get these changes upstreamed
+// immediately, to avoid creating incompatibilities in the bitcode format.
+
+static std::string
+ModuleMetaGet(const Module *module, StringRef MetaName) {
+  NamedMDNode *node = module->getNamedMetadata(MetaName);
+  if (node == NULL)
+    return "";
+  assert(node->getNumOperands() == 1);
+  MDNode *subnode = node->getOperand(0);
+  assert(subnode->getNumOperands() == 1);
+  MDString *value = dyn_cast<MDString>(subnode->getOperand(0));
+  assert(value != NULL);
+  return value->getString();
+}
+
+static void
+ModuleMetaSet(Module *module, StringRef MetaName, StringRef ValueStr) {
+  NamedMDNode *node = module->getNamedMetadata(MetaName);
+  if (node)
+    module->eraseNamedMetadata(node);
+  node = module->getOrInsertNamedMetadata(MetaName);
+  MDString *value = MDString::get(module->getContext(), ValueStr);
+  node->addOperand(MDNode::get(module->getContext(),
+                   makeArrayRef(static_cast<Value*>(value))));
+}
+
+const std::string &Module::getSOName() const {
+  if (ModuleSOName == "")
+    ModuleSOName.assign(ModuleMetaGet(this, "SOName"));
+  return ModuleSOName;
+}
+
+void Module::setSOName(StringRef Name) {
+  ModuleMetaSet(this, "SOName", Name);
+  ModuleSOName = Name;
+}
+
+void Module::setOutputFormat(Module::OutputFormat F) {
+  const char *formatStr;
+  switch (F) {
+  case ObjectOutputFormat: formatStr = "object"; break;
+  case SharedOutputFormat: formatStr = "shared"; break;
+  case ExecutableOutputFormat: formatStr = "executable"; break;
+  default:
+    llvm_unreachable("Unrecognized output format in setOutputFormat()");
+  }
+  ModuleMetaSet(this, "OutputFormat", formatStr);
+}
+
+Module::OutputFormat Module::getOutputFormat() const {
+  std::string formatStr = ModuleMetaGet(this, "OutputFormat");
+  if (formatStr == "" || formatStr == "object")
+    return ObjectOutputFormat;
+  else if (formatStr == "shared")
+    return SharedOutputFormat;
+  else if (formatStr == "executable")
+    return ExecutableOutputFormat;
+  llvm_unreachable("Invalid module compile type in getOutputFormat()");
+}
+
+void
+Module::wrapSymbol(StringRef symName) {
+  std::string wrapSymName("__wrap_");
+  wrapSymName += symName;
+
+  std::string realSymName("__real_");
+  realSymName += symName;
+
+  GlobalValue *SymGV = getNamedValue(symName);
+  GlobalValue *WrapGV = getNamedValue(wrapSymName);
+  GlobalValue *RealGV = getNamedValue(realSymName);
+
+  // Replace uses of "sym" with __wrap_sym.
+  if (SymGV) {
+    if (!WrapGV)
+      WrapGV = cast<GlobalValue>(getOrInsertGlobal(wrapSymName,
+                                                   SymGV->getType()));
+    SymGV->replaceAllUsesWith(ConstantExpr::getBitCast(WrapGV,
+                                                       SymGV->getType()));
+  }
+
+  // Replace uses of "__real_sym" with "sym".
+  if (RealGV) {
+    if (!SymGV)
+      SymGV = cast<GlobalValue>(getOrInsertGlobal(symName, RealGV->getType()));
+    RealGV->replaceAllUsesWith(ConstantExpr::getBitCast(SymGV,
+                                                        RealGV->getType()));
+  }
+}
+
+// The metadata key prefix for NeededRecords.
+static const char *NeededPrefix = "NeededRecord_";
+
+void
+Module::dumpMeta(raw_ostream &OS) const {
+  OS << "OutputFormat: ";
+  switch (getOutputFormat()) {
+    case Module::ObjectOutputFormat: OS << "object"; break;
+    case Module::SharedOutputFormat: OS << "shared"; break;
+    case Module::ExecutableOutputFormat: OS << "executable"; break;
+  }
+  OS << "\n";
+  OS << "SOName: " << getSOName() << "\n";
+  for (Module::lib_iterator L = lib_begin(),
+                            E = lib_end();
+       L != E; ++L) {
+    OS << "NeedsLibrary: " << (*L) << "\n";
+  }
+  std::vector<NeededRecord> NList;
+  getNeededRecords(&NList);
+  for (unsigned i = 0; i < NList.size(); ++i) {
+    const NeededRecord &NR = NList[i];
+    OS << StringRef(NeededPrefix) << NR.DynFile << ": ";
+    for (unsigned j = 0; j < NR.Symbols.size(); ++j) {
+      if (j != 0)
+        OS << " ";
+      OS << NR.Symbols[j];
+    }
+    OS << "\n";
+  }
+}
+
+void Module::addNeededRecord(StringRef DynFile, GlobalValue *GV) {
+  if (DynFile.empty()) {
+    // We never resolved this symbol, even after linking.
+    // This should only happen in a shared object.
+    // It is safe to ignore this symbol, and let the dynamic loader
+    // figure out where it comes from.
+    return;
+  }
+  std::string Key = NeededPrefix;
+  Key += DynFile;
+  // Get the node for this file.
+  NamedMDNode *Node = getOrInsertNamedMetadata(Key);
+  // Add this global value's name to the list.
+  MDString *value = MDString::get(getContext(), GV->getName());
+  Node->addOperand(MDNode::get(getContext(),
+                   makeArrayRef(static_cast<Value*>(value))));
+}
+
+// Get the NeededRecord for SOName.
+// Returns an empty NeededRecord if there was no metadata found.
+static void getNeededRecordFor(const Module *M,
+                               StringRef SOName,
+                               Module::NeededRecord *NR) {
+  NR->DynFile = SOName;
+  NR->Symbols.clear();
+
+  std::string Key = NeededPrefix;
+  Key += SOName;
+  NamedMDNode *Node = M->getNamedMetadata(Key);
+  if (!Node)
+    return;
+
+  for (unsigned k = 0; k < Node->getNumOperands(); ++k) {
+    // Insert the symbol name.
+    const MDString *SymName =
+        dyn_cast<MDString>(Node->getOperand(k)->getOperand(0));
+    NR->Symbols.push_back(SymName->getString());
+  }
+}
+
+// Place the complete list of needed records in NeededOut.
+void Module::getNeededRecords(std::vector<NeededRecord> *NeededOut) const {
+  // Iterate through the libraries needed, grabbing each NeededRecord.
+  for (lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) {
+    NeededRecord NR;
+    getNeededRecordFor(this, *I, &NR);
+    NeededOut->push_back(NR);
+  }
+}
+// @LOCALMOD-END
diff --git a/projects/sample/autoconf/config.sub b/projects/sample/autoconf/config.sub
index 9942491533..8f5793aef3 100755
--- a/projects/sample/autoconf/config.sub
+++ b/projects/sample/autoconf/config.sub
@@ -132,6 +132,10 @@ case $maybe_os in
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  nacl)
+    os=-nacl
+    basic_machine=pnacl-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -347,6 +351,8 @@ case $basic_machine in
 	i*86 | x86_64)
 	  basic_machine=$basic_machine-pc
 	  ;;
+	pnacl-*)
+	  ;;
 	# Object if more than one company name word.
 	*-*-*)
 		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
@@ -1364,6 +1370,8 @@ case $os in
 			;;
 		esac
 		;;
+	-nacl)
+		;;
 	-nto-qnx*)
 		;;
 	-nto*)
diff --git a/projects/sample/configure b/projects/sample/configure
index d92588815a..cb1bb0b2bb 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -3686,6 +3686,11 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="GNU"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NativeClient"
+    llvm_cv_platform_type="Unix" ;;
   *-*-solaris*)
     llvm_cv_link_all_option="-Wl,-z,allextract"
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
diff --git a/tools/Makefile b/tools/Makefile
index 2b4b9b7878..901d3f35e4 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -34,7 +34,7 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \
                  bugpoint llvm-bcanalyzer \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size llvm-stress
+	         llvm-size llvm-stress bc-wrap pso-stub
 
 # Let users override the set of tools to build from the command line.
 ifdef ONLY_TOOLS
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 9c17da6a4c..1c99730d45 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -55,6 +55,25 @@ namespace {
   ld_plugin_set_extra_library_path set_extra_library_path = NULL;
   ld_plugin_get_view get_view = NULL;
   ld_plugin_message message = discard_message;
+  // @LOCALMOD-BEGIN
+  // REL, DYN, or EXEC
+  ld_plugin_output_file_type linker_output;
+
+  // Callback for getting link soname from gold
+  ld_plugin_get_output_soname get_output_soname = NULL;
+
+  // Callback for getting needed libraries from gold
+  ld_plugin_get_needed get_needed = NULL;
+
+  // Callback for getting number of needed library from gold
+  ld_plugin_get_num_needed get_num_needed = NULL;
+
+  // Callback for getting the number of --wrap'd symbols.
+  ld_plugin_get_num_wrapped get_num_wrapped = NULL;
+
+  // Callback for getting the name of a wrapped symbol.
+  ld_plugin_get_wrapped get_wrapped = NULL;
+  // @LOCALMOD-END
 
   int api_version = 0;
   int gold_version = 0;
@@ -62,11 +81,17 @@ namespace {
   struct claimed_file {
     void *handle;
     std::vector<ld_plugin_symbol> syms;
+    bool is_linked_in; // @LOCALMOD
   };
 
   lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
   std::string output_name = "";
   std::list<claimed_file> Modules;
+
+  // @LOCALMOD-BEGIN
+  std::vector<std::string> DepLibs;
+  // @LOCALMOD-END
+
   std::vector<sys::Path> Cleanup;
   lto_code_gen_t code_gen = NULL;
 }
@@ -123,13 +148,25 @@ namespace options {
   }
 }
 
+// @LOCALMOD-BEGIN
+static const char *get_basename(const char *path) {
+  if (path == NULL)
+    return NULL;
+  const char *slash = strrchr(path, '/');
+  if (slash)
+    return slash + 1;
+
+  return path;
+}
+// @LOCALMOD-END
+
 static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed);
 static ld_plugin_status all_symbols_read_hook(void);
 static ld_plugin_status cleanup_hook(void);
 
-extern "C" ld_plugin_status onload(ld_plugin_tv *tv);
-ld_plugin_status onload(ld_plugin_tv *tv) {
+extern "C" ld_plugin_status llvm_plugin_onload(ld_plugin_tv *tv); // @LOCALMOD
+ld_plugin_status llvm_plugin_onload(ld_plugin_tv *tv) { // @LOCALMOD
   // We're given a pointer to the first transfer vector. We read through them
   // until we find one where tv_tag == LDPT_NULL. The REGISTER_* tagged values
   // contain pointers to functions that we need to call to register our own
@@ -150,6 +187,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         output_name = tv->tv_u.tv_string;
         break;
       case LDPT_LINKER_OUTPUT:
+        // @LOCALMOD-BEGIN
+        linker_output =
+          static_cast<ld_plugin_output_file_type>(tv->tv_u.tv_val);
+        // @LOCALMOD-END
         switch (tv->tv_u.tv_val) {
           case LDPO_REL:  // .o
           case LDPO_DYN:  // .so
@@ -213,7 +254,23 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         break;
       case LDPT_GET_VIEW:
         get_view = tv->tv_u.tv_get_view;
+      // @LOCALMOD-BEGIN
+      case LDPT_GET_OUTPUT_SONAME:
+        get_output_soname = tv->tv_u.tv_get_output_soname;
+        break;
+      case LDPT_GET_NEEDED:
+        get_needed = tv->tv_u.tv_get_needed;
+        break;
+      case LDPT_GET_NUM_NEEDED:
+        get_num_needed = tv->tv_u.tv_get_num_needed;
+        break;
+      case LDPT_GET_WRAPPED:
+        get_wrapped = tv->tv_u.tv_get_wrapped;
         break;
+      case LDPT_GET_NUM_WRAPPED:
+        get_num_wrapped = tv->tv_u.tv_get_num_wrapped;
+        break;
+      // @LOCALMOD-END
       case LDPT_MESSAGE:
         message = tv->tv_u.tv_message;
         break;
@@ -231,6 +288,24 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
+  // @LOCALMOD-BEGIN
+  // Parse extra command-line options
+  // Although lto_codegen provides a way to parse command-line arguments,
+  // we need the arguments to be parsed and applied before LTOModules are
+  // even created. In particular, this is needed because the
+  // "-add-nacl-read-tp-dependency" flag affects how modules are created.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      lto_add_command_line_option((*it).c_str());
+    }
+    lto_parse_command_line_options();
+    // We clear the options so that they don't get parsed again in
+    // lto_codegen_debug_options.
+    options::extra.clear();
+  }
+  // @LOCALMOD-END
+
   return LDPS_OK;
 }
 
@@ -297,7 +372,21 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     ld_plugin_symbol &sym = cf.syms.back();
     sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
     sym.name = strdup(sym.name);
+    // @LOCALMOD-BEGIN
+    // Localmods have disabled the use of the 'version' field for passing
+    // version information to Gold. Instead, the version is now transmitted as
+    // part of the 'name' field, which has the form "sym@VER" or "sym@@VER".
+    // This is nicer because it communicates one extra bit of information (@@
+    // marks the default version), and allows us to access the real symbol
+    // name in all_symbols_read.
+
+    // These fields are set by Gold to communicate the updated version info
+    // to the plugin. They are used in all_symbols_read_hook().
+    // Initialize them for predictability.
     sym.version = NULL;
+    sym.is_default = false;
+    sym.dynfile = NULL;
+    // @LOCALMOD-END
 
     int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
     switch (scope) {
@@ -346,16 +435,37 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
   }
 
   cf.syms.reserve(cf.syms.size());
+  // @LOCALMOD-BEGIN
+  bool is_shared =
+    (lto_module_get_output_format(M) == LTO_OUTPUT_FORMAT_SHARED);
+  const char* soname = lto_module_get_soname(M);
+  if (soname[0] == '\0')
+    soname = NULL;
+  // @LOCALMOD-END
 
   if (!cf.syms.empty()) {
-    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
+    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0],
+                       is_shared, soname) != LDPS_OK) { // @LOCALMOD
       (*message)(LDPL_ERROR, "Unable to add symbols!");
       return LDPS_ERR;
     }
   }
 
-  if (code_gen)
-    lto_codegen_add_module(code_gen, M);
+  // @LOCALMOD-BEGIN
+  // Do not merge the module if it's a PSO.
+  // If the PSO's soname is set, add it to DepLibs.
+  cf.is_linked_in = false;
+  if (code_gen) {
+    if (is_shared) {
+      if (soname && strlen(soname) > 0) {
+        DepLibs.push_back(soname);
+      }
+    } else {
+      lto_codegen_add_module(code_gen, M);
+      cf.is_linked_in = true;
+    }
+  }
+  // @LOCALMOD-END
 
   lto_module_dispose(M);
 
@@ -387,13 +497,46 @@ static ld_plugin_status all_symbols_read_hook(void) {
       continue;
     (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
+      // @LOCALMOD-BEGIN
+      // Don't process the symbols inside a dynamic object.
+      if (!I->is_linked_in)
+        continue;
+      // @LOCALMOD-END
+
       if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
+        // @LOCALMOD-BEGIN
+        // Set the symbol version in the module.
+        if (linker_output != LDPO_REL && I->syms[i].version) {
+          // NOTE: This may change the name of the symbol, so it must happen
+          // before the call to lto_codegen_add_must_preserve_symbols() below.
+          I->syms[i].name = const_cast<char *>(
+            lto_codegen_set_symbol_def_version(code_gen, I->syms[i].name,
+                                               I->syms[i].version,
+                                               I->syms[i].is_default));
+        }
         lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
+        // @LOCALMOD-END
         anySymbolsPreserved = true;
 
         if (options::generate_api_file)
           api_file << I->syms[i].name << "\n";
       }
+      // @LOCALMOD-BEGIN
+      else if (linker_output != LDPO_REL &&
+               (I->syms[i].resolution == LDPR_RESOLVED_DYN ||
+                I->syms[i].resolution == LDPR_UNDEF)) {
+        // This symbol is provided by an external object.
+        // Set the version and source dynamic file for it.
+        const char *ver = I->syms[i].version;
+        const char *dynfile = I->syms[i].dynfile;
+        dynfile = get_basename(dynfile);
+        // NOTE: This may change the name of the symbol.
+        I->syms[i].name = const_cast<char *>(
+          lto_codegen_set_symbol_needed(code_gen, I->syms[i].name,
+                                        ver ? ver : "",
+                                        dynfile ? dynfile : ""));
+      }
+      // @LOCALMOD-END
     }
   }
 
@@ -411,6 +554,11 @@ static ld_plugin_status all_symbols_read_hook(void) {
   if (!options::mcpu.empty())
     lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
 
+  // @LOCALMOD-BEGIN (COMMENT)
+  // "extra" will always be empty below, because we process the extra
+  // options earlier, at the end of onload().
+  // @LOCALMOD-END
+
   // Pass through extra options to the code generator.
   if (!options::extra.empty()) {
     for (std::vector<std::string>::iterator it = options::extra.begin();
@@ -419,6 +567,57 @@ static ld_plugin_status all_symbols_read_hook(void) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Store the linker output format into the bitcode.
+  lto_output_format format;
+  switch (linker_output) {
+    case LDPO_REL:
+      format = LTO_OUTPUT_FORMAT_OBJECT;
+      break;
+    case LDPO_DYN:
+      format = LTO_OUTPUT_FORMAT_SHARED;
+      break;
+    case LDPO_EXEC:
+      format = LTO_OUTPUT_FORMAT_EXEC;
+      break;
+    default:
+      (*message)(LDPL_FATAL, "Unknown linker output format (gold-plugin)");
+      abort();
+      break;
+  }
+  lto_codegen_set_merged_module_output_format(code_gen, format);
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // For -shared linking, store the soname into the bitcode.
+  if (linker_output == LDPO_DYN) {
+    const char *soname = (*get_output_soname)();
+    lto_codegen_set_merged_module_soname(code_gen, soname);
+  }
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // Add the needed libraries to the bitcode.
+  unsigned int num_needed = (*get_num_needed)();
+  for (unsigned i=0; i < num_needed; ++i) {
+    const char *soname = (*get_needed)(i);
+    soname = get_basename(soname);
+    lto_codegen_add_merged_module_library_dep(code_gen, soname);
+  }
+  for (std::vector<std::string>::iterator I = DepLibs.begin(),
+           E = DepLibs.end(); I != E; ++I) {
+    lto_codegen_add_merged_module_library_dep(code_gen, I->c_str());
+  }
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // Perform symbol wrapping.
+  unsigned int num_wrapped = (*get_num_wrapped)();
+  for (unsigned i=0; i < num_wrapped; ++i) {
+    const char *sym = (*get_wrapped)(i);
+    lto_codegen_wrap_symbol_in_merged_module(code_gen, sym);
+  }
+  // @LOCALMOD-END
   if (options::generate_bc_file != options::BC_NO) {
     std::string path;
     if (options::generate_bc_file == options::BC_ONLY)
diff --git a/tools/gold/gold.exports b/tools/gold/gold.exports
index 277a33a1ec..89d8640427 100644
--- a/tools/gold/gold.exports
+++ b/tools/gold/gold.exports
@@ -1 +1 @@
-onload
+llvm_plugin_onload
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index 683f29862d..9c695bcdea 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -1,5 +1,11 @@
 set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser)
 
 add_llvm_tool(llc
+# LOCALMOD BEGIN
+# This file provides wrappers to lseek(2), read(2), etc. 
+  nacl_file.cpp
+  StubMaker.cpp
+  TextStubWriter.cpp
+# LOCALMOD END
   llc.cpp
   )
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index a99d0b787a..f87978ec43 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -18,7 +18,9 @@
 #include "llvm/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Support/DataStream.h"
 #include "llvm/Support/IRReader.h"
+#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -26,7 +28,9 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ManagedStatic.h"
+#if !defined(__native_client__)
 #include "llvm/Support/PluginLoader.h"
+#endif
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Host.h"
@@ -36,8 +40,33 @@
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include <memory>
+
+// @LOCALMOD-BEGIN
+#include "StubMaker.h"
+#include "TextStubWriter.h"
+// @LOCALMOD-END
+
 using namespace llvm;
 
+// @LOCALMOD-BEGIN
+// NOTE: this tool can be build as a "sandboxed" translator.
+//       There are two ways to build the translator
+//       SRPC-style:  no file operations are allowed
+//                    see nacl_file.cc for support code
+//       non-SRPC-style: some basic file operations are allowed
+//                       This can be useful for debugging but will
+//                       not be deployed.
+#if defined(__native_client__) && defined(NACL_SRPC)
+MemoryBuffer* NaClGetMemoryBufferForFile(const char* filename);
+void NaClOutputStringToFile(const char* filename, const std::string& data);
+// The following two functions communicate metadata to the SRPC wrapper for LLC.
+void NaClRecordObjectInformation(bool is_shared, const std::string& soname);
+void NaClRecordSharedLibraryDependency(const std::string& library_name);
+DataStreamer* NaClBitcodeStreamer;
+#endif
+// @LOCALMOD-END
+
+
 // General options for llc.  Other pass-specific options are specified
 // within the corresponding llc passes, and target-specific options
 // and back-end code generation options are specified with the target machine.
@@ -48,6 +77,12 @@ InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
 
+// @LOCALMOD-BEGIN
+static cl::opt<std::string>
+MetadataTextFilename("metadata-text", cl::desc("Metadata as text, out filename"),
+                     cl::value_desc("filename"));
+// @LOCALMOD-END
+
 // Determine optimization level.
 static cl::opt<char>
 OptLevel("O",
@@ -244,6 +279,26 @@ SegmentedStacks("segmented-stacks",
   cl::desc("Use segmented stacks if possible."),
   cl::init(false));
 
+// @LOCALMOD-BEGIN
+// Using bitcode streaming has a couple of ramifications. Primarily it means
+// that the module in the file will be compiled one function at a time rather
+// than the whole module. This allows earlier functions to be compiled before
+// later functions are read from the bitcode but of course means no whole-module
+// optimizations. For now, streaming is only supported for files and stdin.
+static cl::opt<bool>
+LazyBitcode("streaming-bitcode",
+  cl::desc("Use lazy bitcode streaming for file inputs"),
+  cl::init(false));
+
+// The option below overlaps very much with bitcode streaming.
+// We keep it separate because it is still experimental and we want
+// to use it without changing the outside behavior which is especially
+// relevant for the sandboxed case.
+static cl::opt<bool>
+ReduceMemoryFootprint("reduce-memory-footprint",
+  cl::desc("Aggressively reduce memory used by llc"),
+  cl::init(false));
+// @LOCALMOD-END
 
 // GetFileNameRoot - Helper function to get the basename of a filename.
 static inline std::string
@@ -323,9 +378,60 @@ static tool_output_file *GetOutputStream(const char *TargetName,
   return FDOut;
 }
 
+// @LOCALMOD-BEGIN
+#if defined(__native_client__) && defined(NACL_SRPC)
+void RecordMetadataForSrpc(const Module &mod) {
+  bool is_shared = (mod.getOutputFormat() == Module::SharedOutputFormat);
+  std::string soname = mod.getSOName();
+  NaClRecordObjectInformation(is_shared, soname);
+  for (Module::lib_iterator L = mod.lib_begin(),
+                            E = mod.lib_end();
+       L != E; ++L) {
+    NaClRecordSharedLibraryDependency(*L);
+  }
+}
+#endif  // defined(__native_client__) && defined(NACL_SRPC)
+// @LOCALMOD-END
+
+
+// @LOCALMOD-BEGIN
+
+// Write the ELF Stubs to the metadata file, in text format
+// Returns 0 on success, non-zero on error.
+int WriteTextMetadataFile(const Module &M, const Triple &TheTriple) {
+  // Build the ELF stubs (in high level format)
+  SmallVector<ELFStub*, 8> StubList;
+  // NOTE: The triple is unnecessary for the text version.
+  MakeAllStubs(M, TheTriple, &StubList);
+  // For each stub, write the ELF object to the metadata file.
+  std::string s;
+  for (unsigned i = 0; i < StubList.size(); i++) {
+    WriteTextELFStub(StubList[i], &s);
+  }
+  FreeStubList(&StubList);
+
+#if defined(__native_client__) && defined(NACL_SRPC)
+  NaClOutputStringToFile(MetadataTextFilename.c_str(), s);
+#else
+  std::string error;
+  OwningPtr<tool_output_file> MOut(
+      new tool_output_file(MetadataTextFilename.c_str(), error,
+                           raw_fd_ostream::F_Binary));
+  if (!error.empty()) {
+    errs() << error << '\n';
+    return 1;
+  }
+  MOut->os().write(s.data(), s.size());
+  MOut->keep();
+#endif
+  return 0;
+}
+
+// @LOCALMOD-END
+
 // main - Entry point for the llc compiler.
 //
-int main(int argc, char **argv) {
+int llc_main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal();
   PrettyStackTraceProgram X(argc, argv);
 
@@ -350,13 +456,64 @@ int main(int argc, char **argv) {
   SMDiagnostic Err;
   std::auto_ptr<Module> M;
 
-  M.reset(ParseIRFile(InputFilename, Err, Context));
+  // @LOCALMOD-BEGIN
+#if defined(__native_client__) && defined(NACL_SRPC)
+  if (LazyBitcode) {
+    std::string StrError;
+    M.reset(getStreamedBitcodeModule(std::string("<SRPC stream>"),
+                                     NaClBitcodeStreamer, Context, &StrError));
+    if (!StrError.empty()) {
+      Err = SMDiagnostic(InputFilename, SourceMgr::DK_Error, StrError);
+    }
+  } else {
+    // In the NACL_SRPC case, fake a memory mapped file
+    // TODO(jvoung): revert changes in MemoryBuffer.cpp, no longer needed
+    M.reset(ParseIR(NaClGetMemoryBufferForFile(InputFilename.c_str()),
+                    Err,
+                    Context));
+    M->setModuleIdentifier(InputFilename);
+  }
+#else
+  if (LazyBitcode) {
+    std::string StrError;
+    DataStreamer *streamer = getDataFileStreamer(InputFilename, &StrError);
+    if (streamer) {
+      M.reset(getStreamedBitcodeModule(InputFilename, streamer, Context,
+                                       &StrError));
+    }
+    if (!StrError.empty()) {
+      Err = SMDiagnostic(InputFilename, SourceMgr::DK_Error, StrError);
+    }
+  } else {
+    M.reset(ParseIRFile(InputFilename, Err, Context));
+  }
+#endif
+  // @LOCALMOD-END
+
   if (M.get() == 0) {
     Err.print(argv[0], errs());
     return 1;
   }
   Module &mod = *M.get();
 
+  // @LOCALMOD-BEGIN
+#if defined(__native_client__) && defined(NACL_SRPC)
+  RecordMetadataForSrpc(mod);
+
+  // To determine if we should compile PIC or not, we needed to load at
+  // least the metadata. Since we've already constructed the commandline,
+  // we have to hack this in after commandline processing.
+  if (mod.getOutputFormat() == Module::SharedOutputFormat) {
+    RelocModel = Reloc::PIC_;
+  }
+  // Also set PIC_ for dynamic executables:
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2351
+  if (mod.lib_size() > 0) {
+    RelocModel = Reloc::PIC_;
+  }
+#endif  // defined(__native_client__) && defined(NACL_SRPC)
+  // @LOCALMOD-END
+
   // If we are supposed to override the target triple, do so now.
   if (!TargetTriple.empty())
     mod.setTargetTriple(Triple::normalize(TargetTriple));
@@ -379,6 +536,11 @@ int main(int argc, char **argv) {
   std::string FeaturesStr;
   if (MAttrs.size()) {
     SubtargetFeatures Features;
+    // @LOCALMOD-BEGIN
+    // Use the same default attribute settings as libLTO.
+    // TODO(pdox): Figure out why this isn't done for upstream llc.
+    Features.getDefaultSubtargetFeatures(TheTriple);
+    // @LOCALMOD-END
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
@@ -443,19 +605,27 @@ int main(int argc, char **argv) {
       TheTriple.isMacOSXVersionLT(10, 6))
     Target.setMCUseLoc(false);
 
+#if !defined(NACL_SRPC)
   // Figure out where we are going to send the output...
   OwningPtr<tool_output_file> Out
     (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
   if (!Out) return 1;
-
+#endif
+  
   // Build up all of the passes that we want to do to the module.
-  PassManager PM;
+  // @LOCALMOD-BEGIN
+  OwningPtr<PassManagerBase> PM;
+  if (LazyBitcode || ReduceMemoryFootprint)
+    PM.reset(new FunctionPassManager(&mod));
+  else
+    PM.reset(new PassManager());
+  // @LOCALMOD-END
 
   // Add the target data from the target machine, if it exists, or the module.
   if (const TargetData *TD = Target.getTargetData())
-    PM.add(new TargetData(*TD));
+    PM->add(new TargetData(*TD));
   else
-    PM.add(new TargetData(&mod));
+    PM->add(new TargetData(&mod));
 
   // Override default to generate verbose assembly.
   Target.setAsmVerbosityDefault(true);
@@ -468,11 +638,44 @@ int main(int argc, char **argv) {
       Target.setMCRelaxAll(true);
   }
 
+
+  
+#if defined __native_client__ && defined(NACL_SRPC)
+  {
+    std::string s;
+    raw_string_ostream ROS(s);
+    formatted_raw_ostream FOS(ROS);
+    // Ask the target to add backend passes as necessary.
+    if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify)) {
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      return 1;
+    }
+
+    if (LazyBitcode || ReduceMemoryFootprint) {
+      FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get());
+      P->doInitialization();
+      for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I) {
+        P->run(*I);
+        if (ReduceMemoryFootprint) {
+          I->Dematerialize();
+        }
+      }
+      P->doFinalization();
+    } else {
+      static_cast<PassManager*>(PM.get())->run(mod);
+    }
+    FOS.flush();
+    ROS.flush();
+    NaClOutputStringToFile(OutputFilename.c_str(), ROS.str());
+  }
+#else
+      
   {
     formatted_raw_ostream FOS(Out->os());
 
     // Ask the target to add backend passes as necessary.
-    if (Target.addPassesToEmitFile(PM, FOS, FileType, NoVerify)) {
+    if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify)) {
       errs() << argv[0] << ": target does not support generation of this"
              << " file type!\n";
       return 1;
@@ -481,11 +684,50 @@ int main(int argc, char **argv) {
     // Before executing passes, print the final values of the LLVM options.
     cl::PrintOptionValues();
 
-    PM.run(mod);
+    if (LazyBitcode || ReduceMemoryFootprint) {
+      FunctionPassManager *P = static_cast<FunctionPassManager*>(PM.get());
+      P->doInitialization();
+      for (Module::iterator I = mod.begin(), E = mod.end(); I != E; ++I) {
+        P->run(*I);
+        if (ReduceMemoryFootprint) {
+          I->Dematerialize();
+        }
+      }
+      P->doFinalization();
+    } else {
+      static_cast<PassManager*>(PM.get())->run(mod);
+    }
   }
 
   // Declare success.
   Out->keep();
+#endif
+
+  // @LOCALMOD-BEGIN
+  // Write out the metadata.
+  //
+  // We need to ensure that intrinsic prototypes are available, in case
+  // we have a NeededRecord for one of them.
+  // They may have been eliminated by the StripDeadPrototypes pass,
+  // or some other pass that is unaware of NeededRecords / IntrinsicLowering.
+  IntrinsicLowering IL(*target->getTargetData());
+  IL.AddPrototypes(*M);
+
+  if (!MetadataTextFilename.empty()) {
+    int err = WriteTextMetadataFile(*M.get(), TheTriple);
+    if (err != 0)
+      return err;
+  }
+  // @LOCALMOD-END
 
   return 0;
 }
+
+#if !defined(NACL_SRPC)
+int
+main (int argc, char **argv) {
+  return llc_main(argc, argv);
+}
+#else
+// main() is in nacl_file.cpp.
+#endif
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 6450ea6ac7..a5f68a6418 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -51,6 +51,13 @@ static cl::opt<bool>
 ShowAnnotations("show-annotations",
                 cl::desc("Add informational comments to the .ll file"));
 
+// @LOCALMOD-BEGIN
+// Print bitcode metadata only, in text format.
+// (includes output format, soname, and dependencies).
+static cl::opt<bool>
+DumpMetadata("dump-metadata", cl::desc("Dump bitcode metadata"));
+// @LOCALMOD-END
+
 namespace {
 
 static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
@@ -156,7 +163,7 @@ int main(int argc, char **argv) {
     OutputFilename = "-";
 
   if (OutputFilename.empty()) { // Unspecified output, infer it.
-    if (InputFilename == "-") {
+    if (InputFilename == "-" || DumpMetadata) { // @LOCALMOD
       OutputFilename = "-";
     } else {
       const std::string &IFN = InputFilename;
@@ -178,6 +185,14 @@ int main(int argc, char **argv) {
     return 1;
   }
 
+  // @LOCALMOD-BEGIN
+  if (DumpMetadata) {
+    M->dumpMeta(Out->os());
+    Out->keep();
+    return 0;
+  }
+  // @LOCALMOD-END
+
   OwningPtr<AssemblyAnnotationWriter> Annotator;
   if (ShowAnnotations)
     Annotator.reset(new CommentWriter());
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 2ed11c52b2..4ea1a9080d 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h" // @LOCALMOD
 #include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -47,6 +48,18 @@ Force("f", cl::desc("Enable binary output on terminals"));
 static cl::opt<bool>
 DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
 
+// @LOCALMOD-BEGIN
+static cl::opt<unsigned>
+Divisor("divisor",
+        cl::init(0),
+        cl::desc("select GV by position (pos % divisor = remainder "));
+
+static cl::opt<unsigned>
+Remainder("remainder",
+          cl::init(0),
+          cl::desc("select GV by position (pos % divisor = remainder "));
+// @LOCALMOD-END
+
 // ExtractFuncs - The functions to extract from the module.
 static cl::list<std::string>
 ExtractFuncs("func", cl::desc("Specify function to extract"),
@@ -131,6 +144,24 @@ int main(int argc, char **argv) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Extract globals via modulo operation.
+  size_t count_globals = 0;
+  if (Divisor != 0) {
+    size_t pos = 0;
+    for (Module::global_iterator GV = M->global_begin(), E = M->global_end();
+         GV != E;
+         GV++, pos++) {
+      if (pos % Divisor == Remainder) {
+        GVs.insert(&*GV);
+      }
+    }
+    dbgs() << "total globals: " <<  pos << "\n";
+    count_globals = GVs.size();
+    dbgs() << "selected globals: " << count_globals  << "\n";
+  }
+  // @LOCALMOD-END
+  
   // Figure out which functions we should extract.
   for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {
     GlobalValue *GV = M->getFunction(ExtractFuncs[i]);
@@ -165,6 +196,22 @@ int main(int argc, char **argv) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Extract functions via modulo operation.
+  if (Divisor != 0) {
+    size_t pos = 0;
+    for (Module::iterator F = M->begin(), E = M->end();
+         F != E;
+         F++, pos++) {
+       if (pos % Divisor == Remainder) {
+         GVs.insert(&*F);
+      }
+    }
+    dbgs() << "total functions: " <<  pos << "\n";
+    dbgs() << "selected functions: " << GVs.size() - count_globals  << "\n";
+  }
+  // @LOCALMOD-END
+  
   // Materialize requisite global values.
   if (!DeleteFn)
     for (size_t i = 0, e = GVs.size(); i != e; ++i) {
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 08139470be..c28f8732b5 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
 #include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -69,6 +70,16 @@ LTOCodeGenerator::LTOCodeGenerator()
   InitializeAllTargets();
   InitializeAllTargetMCs();
   InitializeAllAsmPrinters();
+
+    // @LOCALMOD-BEGIN
+    // Preserve symbols which may be referenced due to the lowering
+    // of an intrinsic.
+    const llvm::StringSet<> &IntrinsicSymbols = IntrinsicLowering::GetFuncNames();
+    for (llvm::StringSet<>::const_iterator it = IntrinsicSymbols.begin(),
+         ie = IntrinsicSymbols.end(); it != ie; ++it) {
+      _mustPreserveSymbols[it->getKey().str().c_str()] = 1;
+    }
+    // @LOCALMOD-END
 }
 
 LTOCodeGenerator::~LTOCodeGenerator() {
@@ -116,6 +127,81 @@ bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model,
   llvm_unreachable("Unknown PIC model!");
 }
 
+// @LOCALMOD-BEGIN
+void LTOCodeGenerator::setMergedModuleOutputFormat(lto_output_format format)
+{
+  Module::OutputFormat outputFormat;
+  switch (format) {
+  case LTO_OUTPUT_FORMAT_OBJECT:
+    outputFormat = Module::ObjectOutputFormat;
+    break;
+  case LTO_OUTPUT_FORMAT_SHARED:
+    outputFormat = Module::SharedOutputFormat;
+    break;
+  case LTO_OUTPUT_FORMAT_EXEC:
+    outputFormat = Module::ExecutableOutputFormat;
+    break;
+  }
+  Module *mergedModule = _linker.getModule();
+  mergedModule->setOutputFormat(outputFormat);
+}
+
+void LTOCodeGenerator::setMergedModuleSOName(const char *soname)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->setSOName(soname);
+}
+
+void LTOCodeGenerator::addLibraryDep(const char *lib)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->addLibrary(lib);
+}
+
+void LTOCodeGenerator::wrapSymbol(const char *sym)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->wrapSymbol(sym);
+}
+
+const char* LTOCodeGenerator::setSymbolDefVersion(const char *sym,
+                                                  const char *ver,
+                                                  bool is_default)
+{
+  Module *mergedModule = _linker.getModule();
+  GlobalValue *GV = mergedModule->getNamedValue(sym);
+  if (!GV) {
+    llvm_unreachable("Invalid global in setSymbolDefVersion");
+  }
+  GV->setVersionDef(ver, is_default);
+  return strdup(GV->getName().str().c_str());
+}
+
+const char* LTOCodeGenerator::setSymbolNeeded(const char *sym,
+                                              const char *ver,
+                                              const char *dynfile)
+{
+  Module *mergedModule = _linker.getModule();
+  GlobalValue *GV = mergedModule->getNamedValue(sym);
+  if (!GV) {
+    // Symbol lookup may have failed because this symbol was already
+    // renamed for versioning. Make sure this is the case.
+    if (strchr(sym, '@') != NULL || ver == NULL || ver[0] == '\0') {
+      llvm_unreachable("Unexpected condition in setSymbolNeeded");
+    }
+    std::string NewName = std::string(sym) + "@" + ver;
+    GV = mergedModule->getNamedValue(NewName);
+  }
+  if (!GV) {
+    // Ignore failures due to unused declarations.
+    // This caused a falure to build libppruntime.so for glibc.
+    // TODO(sehr): better document under which circumstances this is needed.
+    return sym;
+  }
+  GV->setNeeded(ver, dynfile);
+  return strdup(GV->getName().str().c_str());
+}
+// @LOCALMOD-END
 bool LTOCodeGenerator::writeMergedModules(const char *path,
                                           std::string &errMsg) {
   if (determineTarget(errMsg))
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index 3081b7dad1..6741946e81 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -50,6 +50,16 @@ struct LTOCodeGenerator {
   }
 
   bool writeMergedModules(const char *path, std::string &errMsg);
+  // @LOCALMOD-BEGIN
+  void                setMergedModuleOutputFormat(lto_output_format format);
+  void                setMergedModuleSOName(const char *soname);
+  void                addLibraryDep(const char *lib);
+  void                wrapSymbol(const char *sym);
+  const char*         setSymbolDefVersion(const char *sym, const char *ver,
+                                          bool is_default);
+  const char*         setSymbolNeeded(const char *sym, const char *ver,
+                                      const char *dynfile);
+  // @LOCALMOD-END
   bool compile_to_file(const char **name, std::string &errMsg);
   const void *compile(size_t *length, std::string &errMsg);
   void setCodeGenDebugOptions(const char *opts);
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 9a7d76832a..2c841dbc52 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -17,6 +17,8 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h" // @LOCALMOD
+
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
@@ -26,6 +28,7 @@
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h" // @LOCALMOD
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -128,7 +131,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   }
 
   // parse bitcode buffer
-  OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext(),
+  OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), // @LOCALMOD
                                            &errMsg));
   if (!m) {
     delete buffer;
@@ -152,6 +155,13 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   TargetOptions Options;
   TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr,
                                                      Options);
+
+  // @LOCALMOD-BEGIN
+  // Add declarations for functions which may be used by intrinsics.
+  IntrinsicLowering IL(*target->getTargetData());
+  IL.AddPrototypes(*m);
+  // @LOCALMOD-END
+
   LTOModule *Ret = new LTOModule(m.take(), target);
   if (Ret->parseSymbols(errMsg)) {
     delete Ret;
@@ -167,6 +177,33 @@ MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
   return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
 }
 
+// @LOCALMOD-BEGIN
+lto_output_format LTOModule::getOutputFormat() {
+  Module::OutputFormat format = _module->getOutputFormat();
+  switch (format) {
+  case Module::ObjectOutputFormat: return LTO_OUTPUT_FORMAT_OBJECT;
+  case Module::SharedOutputFormat: return LTO_OUTPUT_FORMAT_SHARED;
+  case Module::ExecutableOutputFormat: return LTO_OUTPUT_FORMAT_EXEC;
+  }
+  llvm_unreachable("Unknown output format in LTOModule");
+}
+
+const char *LTOModule::getSOName() {
+  return _module->getSOName().c_str();
+}
+
+const char* LTOModule::getLibraryDep(uint32_t index) {
+  const Module::LibraryListType &Libs = _module->getLibraries();
+  if (index < Libs.size())
+    return Libs[index].c_str();
+  return NULL;
+}
+
+uint32_t LTOModule::getNumLibraryDeps() {
+  return _module->getLibraries().size();
+}
+// @LOCALMOD-END
+
 /// objcClassNameFromExpression - Get string that the data pointer points to.
 bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
   if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
@@ -459,6 +496,16 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, bool isFunc) {
   if (decl->getName().startswith("llvm."))
     return;
 
+  // @LOCALMOD-BEGIN
+  // Bitcode modules may have declarations for functions or globals
+  // which are unused. Ignore them here so that gold does not mistake
+  // them for undefined symbols. But don't ignore declarations for
+  // functions which are potentially used by intrinsics.
+  if (decl->use_empty() &&
+      !IntrinsicLowering::IsCalledByIntrinsic(decl->getName()))
+    return;
+  // @LOCALMOD-END
+
   // ignore all aliases
   if (isa<GlobalAlias>(decl))
     return;
@@ -635,6 +682,12 @@ namespace {
                                    unsigned MaxBytesToEmit) {}
     virtual bool EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value ) { return false; }
+    // @LOCALMOD-BEGIN
+    virtual void EmitBundleLock() {}
+    virtual void EmitBundleUnlock() {}
+    virtual void EmitBundleAlignStart() {}
+    virtual void EmitBundleAlignEnd() {}
+    // @LOCALMOD-END
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index cafb927abf..3be5a5210e 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -98,6 +98,14 @@ public:
     _module->setTargetTriple(triple);
   }
 
+  // @LOCALMOD-BEGIN
+  lto_output_format        getOutputFormat();
+  const char*              getSOName();
+  const char*              getLibraryDep(uint32_t index);
+  uint32_t                 getNumLibraryDeps();
+  // @LOCALMOD-END
+
+
   /// getSymbolCount - Get the number of symbols
   uint32_t getSymbolCount() {
     return _symbols.size();
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index 153fa03137..7388ef8778 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -50,3 +50,11 @@ ifeq ($(HOST_OS),Darwin)
                             -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
     endif
 endif
+
+#@ LOCALMOD-BEGIN
+# This is to fix an upstream bug. It is in the process of being upstreamed.
+# This line can be removed after it has been fixed upstream and we've merged.
+ifneq ($(HOST_OS),Darwin)
+  LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-soname=$(SharedPrefix)LTO$(SHLIBEXT)
+endif
+#@ LOCALMOD-END
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index a7e633d14b..e7d3dcfd29 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -15,6 +15,8 @@
 #include "llvm-c/lto.h"
 #include "llvm-c/Core.h"
 
+#include "llvm/Support/CommandLine.h" // @LOCALMOD
+
 #include "LTOModule.h"
 #include "LTOCodeGenerator.h"
 
@@ -23,6 +25,25 @@
 // *** Not thread safe ***
 static std::string sLastErrorString;
 
+// @LOCALMOD-BEGIN
+static std::vector<const char*> lto_options;
+extern void lto_add_command_line_option(const char* opt)
+{
+  // ParseCommandLineOptions() expects argv[0] to be program name.
+  if (lto_options.empty())
+    lto_options.push_back("libLTO");
+
+  lto_options.push_back(strdup(opt));
+}
+
+extern void lto_parse_command_line_options()
+{
+  if ( !lto_options.empty() )
+      llvm::cl::ParseCommandLineOptions(lto_options.size(),
+                                        const_cast<char **>(&lto_options[0]));
+}
+// @LOCALMOD-END
+
 /// lto_get_version - Returns a printable string.
 extern const char* lto_get_version() {
   return LTOCodeGenerator::getVersionString();
@@ -107,6 +128,45 @@ void lto_module_set_target_triple(lto_module_t mod, const char *triple) {
   return mod->setTargetTriple(triple);
 }
 
+// @LOCALMOD-BEGIN
+
+//
+// Get the module format for this module
+//
+lto_output_format lto_module_get_output_format(lto_module_t mod)
+{
+  return mod->getOutputFormat();
+}
+
+//
+// Get the module soname
+//
+const char* lto_module_get_soname(lto_module_t mod)
+{
+  return mod->getSOName();
+}
+
+//
+// Get the i'th library dependency.
+// Returns NULL if i >= lto_module_get_num_library_deps()
+//
+const char *
+lto_module_get_library_dep(lto_module_t mod, unsigned int i)
+{
+  return mod->getLibraryDep(i);
+}
+
+//
+// Return the number of library dependencies of this module.
+//
+unsigned int
+lto_module_get_num_library_deps(lto_module_t mod)
+{
+  return mod->getNumLibraryDeps();
+}
+
+// @LOCALMOD-END
+
 /// lto_module_get_num_symbols - Returns the number of symbols in the object
 /// module.
 unsigned int lto_module_get_num_symbols(lto_module_t mod) {
@@ -183,6 +243,77 @@ void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg,
   cg->addMustPreserveSymbol(symbol);
 }
 
+// @LOCALMOD-BEGIN
+
+//
+// Set the module format for the merged module
+//
+void lto_codegen_set_merged_module_output_format(lto_code_gen_t cg,
+                                                 lto_output_format format)
+{
+  cg->setMergedModuleOutputFormat(format);
+}
+
+//
+// Set the module soname (for shared library bitcode)
+//
+void lto_codegen_set_merged_module_soname(lto_code_gen_t cg,
+                                          const char* soname)
+{
+  cg->setMergedModuleSOName(soname);
+}
+
+//
+// Add a library dependency to the linked bitcode module.
+//
+void lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg,
+                                               const char* soname)
+{
+  cg->addLibraryDep(soname);
+}
+
+//
+// Apply symbol wrapping in the linked bitcode module.
+//
+void lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg,
+                                              const char* sym) {
+  cg->wrapSymbol(sym);
+}
+
+//
+// Set the symbol version of defined symbol 'sym'.
+// 'sym' is the name of the GlobalValue, exactly as it is
+// in the LLVM module. It may already have a version suffix.
+// In that case, this function verifies that the old version
+// and new version match.
+// Returns a reference to the new name.
+//
+const char *
+lto_codegen_set_symbol_def_version(lto_code_gen_t cg,
+                                   const char *sym,
+                                   const char *version,
+                                   bool is_default) {
+  return cg->setSymbolDefVersion(sym, version, is_default);
+}
+
+//
+// Set the symbol version of needed symbol 'sym' from file 'dynfile'.
+// 'sym' is the name of the GlobalValue, exactly as it is
+// in the LLVM module. It may already have a version suffix.
+// In that case, this function verifies that the old version
+// and new version match.
+// In any case, it adds a NeededRecord entry.
+// Returns a reference to the new name.
+//
+const char*
+lto_codegen_set_symbol_needed(lto_code_gen_t cg,
+                              const char *sym,
+                              const char *version,
+                              const char *dynfile) {
+  return cg->setSymbolNeeded(sym, version, dynfile);
+}
+// @LOCALMOD-END
+
 /// lto_codegen_write_merged_modules - Writes a new file at the specified path
 /// that contains the merged contents of all modules added so far. Returns true
 /// on error (check lto_get_error_message() for details).
@@ -212,3 +343,4 @@ bool lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name) {
 void lto_codegen_debug_options(lto_code_gen_t cg, const char *opt) {
   cg->setCodeGenDebugOptions(opt);
 }
+
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index b900bfb594..bb062259b9 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -1,3 +1,5 @@
+lto_add_command_line_option
+lto_parse_command_line_options
 lto_get_error_message
 lto_get_version
 lto_module_create
@@ -9,6 +11,10 @@ lto_module_get_symbol_attribute
 lto_module_get_symbol_name
 lto_module_get_target_triple
 lto_module_set_target_triple
+lto_module_get_output_format
+lto_module_get_soname
+lto_module_get_library_dep
+lto_module_get_num_library_deps
 lto_module_is_object_file
 lto_module_is_object_file_for_target
 lto_module_is_object_file_in_memory
@@ -19,6 +25,9 @@ lto_codegen_add_must_preserve_symbol
 lto_codegen_compile
 lto_codegen_create
 lto_codegen_dispose
+lto_codegen_set_assembler_args
+lto_codegen_set_assembler_path
+lto_codegen_set_cpu
 lto_codegen_set_debug_model
 lto_codegen_set_pic_model
 lto_codegen_write_merged_modules
@@ -26,6 +35,12 @@ lto_codegen_debug_options
 lto_codegen_set_assembler_args
 lto_codegen_set_assembler_path
 lto_codegen_set_cpu
+lto_codegen_set_merged_module_output_format
+lto_codegen_set_merged_module_soname
+lto_codegen_add_merged_module_library_dep
+lto_codegen_set_symbol_def_version
+lto_codegen_set_symbol_needed
+lto_codegen_wrap_symbol_in_merged_module
 lto_codegen_compile_to_file
 LLVMCreateDisasm
 LLVMDisasmDispose
diff --git a/utils/Makefile b/utils/Makefile
index ecb30bed7c..d117b5a87f 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -11,6 +11,15 @@ LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
 	      count fpcmp llvm-lit not unittest
 
+ifeq ($(NACL_SANDBOX),1)
+  # In sandboxed mode, just build the bare minimum
+  # Note: TableGen is usually built twice: 
+  #       * once with host compiler
+  #       * also with the "given" compiler
+  #  Here we just disable that second build
+  PARALLEL_DIRS := 
+endif
+
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
 	      getsrcs.sh llvmdo llvmgrep llvm-native-gcc \
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index dfa9526cc9..a6bea14dc1 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -290,6 +290,12 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
     "REG_SEQUENCE",
     "COPY",
     "BUNDLE",
+    // @LOCALMOD-BEGIN
+    "BUNDLE_ALIGN_START",
+    "BUNDLE_ALIGN_END",
+    "BUNDLE_LOCK",
+    "BUNDLE_UNLOCK",
+    // @LOCALMOD-END
     0
   };
   const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index bd98308aea..6fb2feecbc 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -272,6 +272,7 @@ static int X86TypeFromOpName(LiteralConstantEmitter *type,
   REG("RFP32");
   REG("GR64");
   REG("GR64_NOAX");
+  REG("GR32_TC_64"); // @LOCALMOD
   REG("GR64_TC");
   REG("FR64");
   REG("VR64");
author	Derek Schuff <dschuff@chromium.org>	2012-07-09 10:52:46 -0700
committer	Derek Schuff <dschuff@chromium.org>	2012-07-09 11:00:37 -0700
commit	5dbcc7e0c9c12f4a4042fb4a226654aee927999c (patch)
tree	b316a3370e9286cb4e6f81b2f9d8bd8b54ce5123
parent	86dc97be9ac3b4804528e087b04b4f4192cdee54 (diff)