25 files changed, 5984 insertions, 0 deletions
diff --git a/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
new file mode 100644
index 0000000000..661bb47550
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
@@ -0,0 +1,429 @@
+//===-- NaClJITMemoryManager.cpp - Memory Allocator for JIT'd code --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the NaClJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/NaClJITMemoryManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+#if defined(__linux__) || defined(__native_client__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+using namespace llvm;
+
+#ifdef __native_client__
+// etext is guarded by ifdef so the code still compiles on non-ELF platforms
+extern char etext;
+#endif
+
+// The way NaCl linking is currently setup, there is a gap between the text
+// segment and the rodata segment where we can fill dyncode. The text ends
+// at etext, but there's no symbol for the start of rodata. Currently the
+// linker script puts it at 0x11000000
+// If we run out of space there, we can also allocate below the text segment
+// and keep going downward until we run into code loaded by the dynamic
+// linker. (TODO(dschuff): make that work)
+// For now, just start at etext and go until we hit rodata
+
+// It's an open issue that lazy jitting is not thread safe (PR5184). However
+// NaCl's dyncode_create solves exactly this problem, so in the future
+// this allocator could (should?) be made thread safe
+
+const size_t NaClJITMemoryManager::kStubSlabSize;
+const size_t NaClJITMemoryManager::kDataSlabSize;
+const size_t NaClJITMemoryManager::kCodeSlabSize;
+
+// TODO(dschuff) fix allocation start (etext + 64M is hopefully after where
+// glibc is loaded) and limit (maybe need a linker-provide symbol for the start
+// of the IRT or end of the segment gap)
+// (also fix allocateCodeSlab and maybe allocateStubSlab at that time)
+// what we really need is a usable nacl_dyncode_alloc(), but this could still
+// be improved upon using dl_iterate_phdr
+const static intptr_t kNaClSegmentGapEnd = 0x11000000;
+
+NaClJITMemoryManager::NaClJITMemoryManager() :
+    AllocatableRegionLimit((uint8_t *)kNaClSegmentGapEnd),
+    NextCode(AllocatableRegionStart), GOTBase(NULL) {
+#ifdef __native_client__
+  AllocatableRegionStart = (uint8_t *)&etext + 1024*1024*64;
+#else
+    assert(false && "NaClJITMemoryManager will not work outside NaCl sandbox");
+#endif
+  AllocatableRegionStart =
+      (uint8_t *)RoundUpToAlignment((uint64_t)AllocatableRegionStart,
+                                    kBundleSize);
+  NextCode = AllocatableRegionStart;
+
+  // Allocate 1 stub slab to get us started
+  CurrentStubSlab = allocateStubSlab(0);
+  InitFreeList(&CodeFreeListHead);
+  InitFreeList(&DataFreeListHead);
+
+  DEBUG(dbgs() << "NaClJITMemoryManager: AllocatableRegionStart " <<
+        AllocatableRegionStart << " Limit " << AllocatableRegionLimit << "\n");
+}
+
+NaClJITMemoryManager::~NaClJITMemoryManager() {
+  delete [] GOTBase;
+  DestroyFreeList(CodeFreeListHead);
+  DestroyFreeList(DataFreeListHead);
+}
+
+FreeListNode *NaClJITMemoryManager::allocateCodeSlab(size_t MinSize) {
+  FreeListNode *node = new FreeListNode();
+  if (AllocatableRegionLimit - NextCode < (int)kCodeSlabSize) {
+    // TODO(dschuff): might be possible to try the space below text segment?
+    report_fatal_error("Ran out of code space");
+  }
+  node->address = NextCode;
+  node->size = std::max(kCodeSlabSize, MinSize);
+  NextCode += node->size;
+  DEBUG(dbgs() << "allocated code slab " << NextCode - node->size << "-" <<
+        NextCode << "\n");
+  return node;
+}
+
+SimpleSlab NaClJITMemoryManager::allocateStubSlab(size_t MinSize) {
+  SimpleSlab s;
+  DEBUG(dbgs() << "allocateStubSlab: ");
+  // It's a little weird to just allocate and throw away the FreeListNode, but
+  // since code region allocation is still a bit ugly and magical, I decided
+  // it's better to reuse allocateCodeSlab than duplicate the logic.
+  FreeListNode *n = allocateCodeSlab(MinSize);
+  s.address = n->address;
+  s.size = n->size;
+  s.next_free = n->address;
+  delete n;
+  return s;
+}
+
+FreeListNode *NaClJITMemoryManager::allocateDataSlab(size_t MinSize) {
+  FreeListNode *node = new FreeListNode;
+  size_t size = std::max(kDataSlabSize, MinSize);
+  node->address = (uint8_t*)DataAllocator.Allocate(size, kBundleSize);
+  node->size = size;
+  return node;
+}
+
+void NaClJITMemoryManager::InitFreeList(FreeListNode **Head) {
+  // Make sure there is always at least one entry in the free list
+  *Head = new FreeListNode;
+  (*Head)->Next = (*Head)->Prev = *Head;
+  (*Head)->size = 0;
+}
+
+void NaClJITMemoryManager::DestroyFreeList(FreeListNode *Head) {
+  FreeListNode *n = Head->Next;
+  while(n != Head) {
+    FreeListNode *next = n->Next;
+    delete n;
+    n = next;
+  }
+  delete Head;
+}
+
+FreeListNode *NaClJITMemoryManager::FreeListAllocate(uintptr_t &ActualSize,
+    FreeListNode *Head,
+    FreeListNode * (NaClJITMemoryManager::*allocate)(size_t)) {
+  FreeListNode *candidateBlock = Head;
+  FreeListNode *iter = Head->Next;
+
+  uintptr_t largest = candidateBlock->size;
+  // Search for the largest free block
+  while (iter != Head) {
+    if (iter->size > largest) {
+      largest = iter->size;
+      candidateBlock = iter;
+    }
+    iter = iter->Next;
+  }
+
+  if (largest < ActualSize || largest == 0) {
+    candidateBlock = (this->*allocate)(ActualSize);
+  } else {
+    candidateBlock->RemoveFromFreeList();
+  }
+  return candidateBlock;
+}
+
+void NaClJITMemoryManager::FreeListFinishAllocation(FreeListNode *Block,
+    FreeListNode *Head, uint8_t *AllocationStart, uint8_t *AllocationEnd,
+    AllocationTable &Table) {
+  assert(AllocationEnd > AllocationStart);
+  assert(Block->address == AllocationStart);
+  uint8_t *End = (uint8_t *)RoundUpToAlignment((uint64_t)AllocationEnd,
+                                               kBundleSize);
+  assert(End <= Block->address + Block->size);
+  int AllocationSize = End - Block->address;
+  Table[AllocationStart] = AllocationSize;
+
+  Block->size -= AllocationSize;
+  if (Block->size >= kBundleSize * 2) {//TODO(dschuff): better heuristic?
+    Block->address = End;
+    Block->AddToFreeList(Head);
+  } else {
+    delete Block;
+  }
+  DEBUG(dbgs()<<"FinishAllocation size "<< AllocationSize <<" end "<<End<<"\n");
+}
+
+void NaClJITMemoryManager::FreeListDeallocate(FreeListNode *Head,
+                                              AllocationTable &Table,
+                                              void *Body) {
+  uint8_t *Allocation = (uint8_t *)Body;
+  DEBUG(dbgs() << "deallocating "<<Body<<" ");
+  assert(Table.count(Allocation) && "FreeList Deallocation not found in table");
+  FreeListNode *Block = new FreeListNode;
+  Block->address = Allocation;
+  Block->size = Table[Allocation];
+  Block->AddToFreeList(Head);
+  DEBUG(dbgs() << "deallocated "<< Allocation<< " size " << Block->size <<"\n");
+}
+
+uint8_t *NaClJITMemoryManager::startFunctionBody(const Function *F,
+                                                 uintptr_t &ActualSize) {
+  CurrentCodeBlock = FreeListAllocate(ActualSize, CodeFreeListHead,
+                                  &NaClJITMemoryManager::allocateCodeSlab);
+  DEBUG(dbgs() << "startFunctionBody CurrentBlock " << CurrentCodeBlock <<
+        " addr " << CurrentCodeBlock->address << "\n");
+  ActualSize = CurrentCodeBlock->size;
+  return CurrentCodeBlock->address;
+}
+
+void NaClJITMemoryManager::endFunctionBody(const Function *F,
+                                           uint8_t *FunctionStart,
+                                           uint8_t *FunctionEnd) {
+  DEBUG(dbgs() << "endFunctionBody ");
+  FreeListFinishAllocation(CurrentCodeBlock, CodeFreeListHead,
+                           FunctionStart, FunctionEnd, AllocatedFunctions);
+
+}
+
+uint8_t *NaClJITMemoryManager::allocateCodeSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  llvm_unreachable("Implement me! (or don't.)");
+}
+
+uint8_t *NaClJITMemoryManager::allocateDataSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  return (uint8_t *)DataAllocator.Allocate(Size, Alignment);
+}
+
+void NaClJITMemoryManager::deallocateFunctionBody(void *Body) {
+  DEBUG(dbgs() << "deallocateFunctionBody, ");
+  if (Body) FreeListDeallocate(CodeFreeListHead, AllocatedFunctions, Body);
+}
+
+uint8_t *NaClJITMemoryManager::allocateStub(const GlobalValue* F,
+                                            unsigned StubSize,
+                                            unsigned Alignment) {
+  uint8_t *StartAddress = (uint8_t *)(uintptr_t)
+      RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment);
+  if (StartAddress + StubSize >
+      CurrentStubSlab.address + CurrentStubSlab.size) {
+    CurrentStubSlab = allocateStubSlab(kStubSlabSize);
+    StartAddress = (uint8_t *)(uintptr_t)
+        RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment);
+  }
+  CurrentStubSlab.next_free = StartAddress + StubSize;
+  DEBUG(dbgs() <<"allocated stub "<<StartAddress<< " size "<<StubSize<<"\n");
+  return StartAddress;
+}
+
+uint8_t *NaClJITMemoryManager::allocateSpace(intptr_t Size,
+                                             unsigned Alignment) {
+  uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+  DEBUG(dbgs() << "allocateSpace " << Size <<"/"<<Alignment<<" ret "<<r<<"\n");
+  return r;
+}
+
+uint8_t *NaClJITMemoryManager::allocateGlobal(uintptr_t Size,
+                                              unsigned Alignment) {
+  uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+  DEBUG(dbgs() << "allocateGlobal " << Size <<"/"<<Alignment<<" ret "<<r<<"\n");
+  return r;
+}
+
+uint8_t* NaClJITMemoryManager::startExceptionTable(const Function* F,
+                                                   uintptr_t &ActualSize) {
+  CurrentDataBlock = FreeListAllocate(ActualSize, DataFreeListHead,
+                                      &NaClJITMemoryManager::allocateDataSlab);
+  DEBUG(dbgs() << "startExceptionTable CurrentBlock " << CurrentDataBlock <<
+        " addr " << CurrentDataBlock->address << "\n");
+  ActualSize = CurrentDataBlock->size;
+  return CurrentDataBlock->address;
+}
+
+void NaClJITMemoryManager::endExceptionTable(const Function *F,
+                                           uint8_t *TableStart,
+                       uint8_t *TableEnd, uint8_t* FrameRegister) {
+  DEBUG(dbgs() << "endExceptionTable ");
+  FreeListFinishAllocation(CurrentDataBlock, DataFreeListHead,
+                           TableStart, TableEnd, AllocatedTables);
+}
+
+void NaClJITMemoryManager::deallocateExceptionTable(void *ET) {
+  DEBUG(dbgs() << "deallocateExceptionTable, ");
+  if (ET) FreeListDeallocate(DataFreeListHead, AllocatedTables, ET);
+}
+
+// Copy of DefaultJITMemoryManager's implementation
+void NaClJITMemoryManager::AllocateGOT() {
+  assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
+  HasGOT = true;
+}
+
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+// This code is pasted directly from r153607 of JITMemoryManager.cpp and has
+// never been tested. It most likely doesn't work inside the sandbox.
+//===----------------------------------------------------------------------===//
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *NaClJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  // Check to see if this is one of the functions we want to intercept.  Note,
+  // we cast to intptr_t here to silence a -pedantic warning that complains
+  // about casting a function pointer to a normal pointer.
+  if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+  if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  // If this is an asm specifier, skip the sentinal.
+  if (NameStr[0] == 1) ++NameStr;
+
+  // If it's an external function, look it up in the process image...
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+  // are references to hidden visibility symbols that dlsym cannot resolve.
+  // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+  if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+      memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+    // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+    // This mirrors logic in libSystemStubs.a.
+    std::string Prefix = std::string(Name.begin(), Name.end()-9);
+    if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+      return Ptr;
+    if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+      return Ptr;
+  }
+#endif
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/lib/Target/ARM/ARMInstrNaCl.td b/lib/Target/ARM/ARMInstrNaCl.td
new file mode 100644
index 0000000000..c884cd0fe4
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrNaCl.td
@@ -0,0 +1,145 @@
+//====- ARMInstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the modifications to the X86 instruction set needed for
+// Native Client code generation.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+//
+//                       Native Client Pseudo-Instructions
+//
+// These instructions implement the Native Client pseudo-instructions, such
+// as nacljmp and naclasp.
+//
+// TableGen and MC consider these to be "real" instructions. They can be
+// parsed by the AsmParser and emitted by the AsmStreamer as if they
+// were just regular instructions. They are not marked "Pseudo" because
+// this would imply isCodeGenOnly=1, which would stop them from being
+// parsed by the assembler.
+//
+// These instructions cannot be encoded (written into an object file) by the
+// MCCodeEmitter. Instead, during direct object emission, they get lowered to
+// a sequence of streamer emits. (see ARMInstrNaCl.cpp)
+// 
+// These instructions should not be used in CodeGen. They have no pattern
+// and lack CodeGen metadata. Instead, the ARMNaClRewritePass should
+// generate these instructions after CodeGen is finished.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM Native Client "Pseudo" Instructions
+//===----------------------------------------------------------------------===//
+
+// It does not seem possible to define a single base class for both the 
+// synthetic isCodeGenOnly=1 instructions as well as the isAsmParserOnly=1 
+// versions.
+
+// These are the fixed flags:
+// AddrMode am = AddrModeNone
+// SizeFlagVal sz = SizeSpecial
+// IndexMode im = IndexModeNone
+// Domain d = GenericDomain
+// InstrItinClass = NoItinerary
+
+// The non-fixed flags need their own class 
+// InOperandList = !con(iops, (ins pred:$p)) or left alone
+// AsmString = !strconcat(opc, "${p}", asm) or left alone
+// Format f = MiscFrm/Pseudo
+// isPredicable = 0/1 
+
+/// However, it is possible to make a set of two base classes for the isAsmParserOnly=1
+/// synthetic instructions.
+
+
+/***** FIXME: ADD in isAsmParserOnly naclguard instructions ***************************
+/// required args:
+// dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat
+
+class NaClSI<dag outs, dag ins, string opc, string asm, string cstr> 
+  : I<outs, ins, AddrModeNone, SizeSpecial, IndexModeNone, MiscFrm, 
+              NoItinerary, opc, asm, cstr, pat>, Requires<[IsNaCl]>;
+
+class NaClSINoP<dag outs, dag ins, string opc, string asm, string cstr> 
+  : InoP <outs, ins, AddrModeNone, SizeSpecial, IndexModeNone, MiscFrm, 
+              NoItinerary, opc, asm, cstr, pat>, Requires<[IsNaCl]>;
+
+class NaClSI<dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat> 
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, f,
+            GenericDomain, cstr, NoItinerary>, Requires<[IsNaCl]> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let AsmString = !strconcat(opc, asm);
+};
+
+
+/// For not pseudo instructionable      
+class NaClSINoP<dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat> 
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, f,
+            GenericDomain, cstr, NoItinerary>, Requires<[IsNaCl]> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let AsmString = !strconcat(opc, asm);
+};
+
+/// This is the guarded isCodeGenOnly pseudo instruction for BX_RET
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isCodeGenOnly = 1,
+    // Make sure this is selected in lieu of 
+    AddedComplexity = 1 
+ in {
+  // ARMV4T and above
+  def NACL_CG_BX_RET : 
+     ARMPseudoInst<(outs), (ins), BrMiscFrm, IIC_Br, 
+                   "naclbx", "\tlr", [(ARMretflag)]>,
+     Requires<[HasV4T, IsNaCl]> {
+  }
+}
+
+
+// These are assembler only instructions 
+let isAsmParserOnly = 1 in {
+  def NACL_GUARD_LOADSTORE :
+  NaClSI<(outs GPR:$dst), (ins GPR:$a), 
+         "naclguard", "\t${dst}, ${a}", "" []>;
+
+  let Defs = [CPSR] in
+  def NACL_GUARD_LOADSTORE_TST :
+    NaClSINoP<
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a), NoItinerary, []> ;
+
+
+  let Defs = [CPSR] in
+  def NACL_GUARD_LOADSTORE_TST :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a), NoItinerary, []>;
+
+  def NACL_GUARD_INDIRECT_CALL :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+  def NACL_GUARD_INDIRECT_JMP :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+  def NACL_GUARD_CALL :
+  PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+  // NOTE: the BX_RET instruction hardcodes lr as well
+  def NACL_GUARD_RETURN :
+  PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+  // Note: intention is that $src and $dst are the same register.
+  def NACL_DATA_MASK :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+}
+
+
+**************************************************************************/
diff --git a/lib/Target/ARM/ARMNaClHeaders.cpp b/lib/Target/ARM/ARMNaClHeaders.cpp
new file mode 100644
index 0000000000..781702158a
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClHeaders.cpp
@@ -0,0 +1,192 @@
+//===-- ARMNaClHeaders.cpp - Print SFI headers to an ARM .s file -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in ARM assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "ARMNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitSFIHeaders(raw_ostream &O) {
+  O << " @ ========================================\n";
+  O << "@ Branch: " << FlagSfiBranch << "\n";
+  O << "@ Stack: " << FlagSfiStack << "\n";
+  O << "@ Store: " << FlagSfiStore << "\n";
+  O << "@ Data: " << FlagSfiData << "\n";
+
+  O << " @ ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_illegal_if_at_bundle_begining\n"
+    "\tsfi_long_based_on_pos 1 0 0 0 0xe1277777\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0xe320f000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " @ ========================================\n";
+  if (FlagSfiZeroMask) {
+    // This mode sets all mask to zero which makes them into nops
+    // this is useful for linking this code against non-sandboxed code
+    // for debugging purposes
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0x00000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+  } else {
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0xc000000f\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " @ ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble cond=\n"
+      "\tsfi_nops_to_force_slot3\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+    
+    // This is used just before "bx rx"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble link cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is use just before "blx rx"
+    O <<
+      "\t.macro sfi_indirect_call_preamble link cond=\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " @ ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg, \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_tst \\reg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  const char* kPreds[] = {
+    "eq",
+    "ne",
+    "lt",
+    "le",
+    "ls",
+    "ge",
+    "gt",
+    "hs",
+    "hi",
+    "lo",
+    "mi",
+    "pl",
+    NULL,
+  };
+
+  O << " @ ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp
new file mode 100644
index 0000000000..91087aaaa2
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.cpp
@@ -0,0 +1,755 @@
+//===-- ARMNaClRewritePass.cpp - Native Client Rewrite Pass  ------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+// 
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-sfi"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <set>
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace llvm {
+
+cl::opt<bool>
+FlagSfiData("sfi-data", cl::desc("use illegal at data bundle beginning"));
+
+cl::opt<bool>
+FlagSfiLoad("sfi-load", cl::desc("enable sandboxing for load"));
+
+cl::opt<bool>
+FlagSfiStore("sfi-store", cl::desc("enable sandboxing for stores"));
+
+cl::opt<bool>
+FlagSfiStack("sfi-stack", cl::desc("enable sandboxing for stack changes"));
+
+cl::opt<bool>
+FlagSfiBranch("sfi-branch", cl::desc("enable sandboxing for branches"));
+
+}
+
+namespace {
+  class ARMNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxMemoryReferencesInBlock(MachineBasicBlock &MBB);
+    void SandboxMemory(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI,
+                       MachineInstr &MI,
+                       int AddrIdx,
+                       bool CPSRLive,
+                       bool IsLoad);
+    bool TryPredicating(MachineInstr &MI, ARMCC::CondCodes);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void LightweightVerify(MachineFunction &MF);
+  };
+  char ARMNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == ARM::BX_RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BX:
+   case ARM::TAILJMPr:
+    return true;
+  }
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return MI.getOpcode() == ARM::BLX;
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BL:
+   case ARM::BL_pred:
+   case ARM::TPsoft:
+     return true;
+  }
+}
+
+static bool IsCPSRLiveOut(const MachineBasicBlock &MBB) {
+  // CPSR is live-out if any successor lists it as live-in.
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+                                              E = MBB.succ_end();
+       SI != E;
+       ++SI) {
+    const MachineBasicBlock *Succ = *SI;
+    if (Succ->isLiveIn(ARM::CPSR)) return true;
+  }
+  return false;
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  dbgs() << MI;
+  dbgs() << MI.getNumOperands() << " operands:" << "\n";
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+    const MachineOperand& op = MI.getOperand(i);
+    dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+  }
+  dbgs() << "\n";
+}
+
+static void DumpBasicBlockVerbose(const MachineBasicBlock &MBB) {
+  dbgs() << "\n<<<<< DUMP BASIC BLOCK START\n";
+  for (MachineBasicBlock::const_iterator MBBI = MBB.begin(), MBBE = MBB.end();
+       MBBI != MBBE;
+       ++MBBI) {
+    DumpInstructionVerbose(*MBBI);
+  }
+  dbgs() << "<<<<< DUMP BASIC BLOCK END\n\n";
+}
+
+static void DumpBasicBlockVerboseCond(const MachineBasicBlock &MBB, bool b) {
+  if (b) {
+    DumpBasicBlockVerbose(MBB);
+  }
+}
+
+/**********************************************************************/
+/* Exported functions */
+
+namespace ARM_SFI {
+
+bool IsStackChange(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+  return MI.modifiesRegister(ARM::SP, TRI);
+}
+
+bool NextInstrMasksSP(const MachineInstr &MI) {
+  MachineBasicBlock::const_iterator It = &MI;
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  MachineBasicBlock::const_iterator next = ++It;
+  if (next == MBB->end()) {
+    return false;
+  }
+
+  const MachineInstr &next_instr = *next;
+  unsigned opcode = next_instr.getOpcode();
+  return (opcode == ARM::SFI_DATA_MASK) &&
+      (next_instr.getOperand(0).getReg() == ARM::SP);
+}
+
+bool IsSandboxedStackChange(const MachineInstr &MI) {
+  // Calls do not change the stack on ARM but they have implicit-defs, so
+  // make sure they do not get sandboxed.
+  if (MI.getDesc().isCall())
+    return true;
+
+  unsigned opcode = MI.getOpcode();
+  switch (opcode) {
+    default: break;
+
+    // These just bump SP by a little (and access the stack),
+    // so that is okay due to guard pages.
+    case ARM::STMIA_UPD:
+    case ARM::STMDA_UPD:
+    case ARM::STMDB_UPD:
+    case ARM::STMIB_UPD:
+
+    case ARM::VSTMDIA_UPD:
+    case ARM::VSTMDDB_UPD:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDMIA_UPD:
+    case ARM::LDMDA_UPD:
+    case ARM::LDMDB_UPD:
+    case ARM::LDMIB_UPD:
+
+    case ARM::VLDMDIA_UPD:
+    case ARM::VLDMDDB_UPD:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD: {
+      bool dest_SP = false;
+      // Dest regs start at operand index 4.
+      for (unsigned i = 4; i < MI.getNumOperands(); ++i) {
+        const MachineOperand &DestReg = MI.getOperand(i);
+        dest_SP = dest_SP || (DestReg.getReg() == ARM::SP);
+      }
+      if (dest_SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Some localmods *should* prevent selecting a reg offset
+    // (see SelectAddrMode2 in ARMISelDAGToDAG.cpp).
+    // Otherwise, the store is already a potential violation.
+    case ARM::STR_PRE_REG:
+    case ARM::STR_PRE_IMM:
+
+    case ARM::STRH_PRE:
+
+    case ARM::STRB_PRE_REG:
+    case ARM::STRB_PRE_IMM:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDRi12:
+    case ARM::LDR_PRE_REG:
+    case ARM::LDR_PRE_IMM:
+    case ARM::LDRH_PRE:
+    case ARM::LDRB_PRE_REG:
+    case ARM::LDRB_PRE_IMM:
+    case ARM::LDRSH_PRE:
+    case ARM::LDRSB_PRE: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Here, if SP is the base / write-back reg, we need to check if
+    // a reg is used as offset (otherwise it is not a small nudge).
+    case ARM::STR_POST_REG:
+    case ARM::STR_POST_IMM:
+    case ARM::STRH_POST:
+    case ARM::STRB_POST_REG:
+    case ARM::STRB_POST_IMM: {
+      const MachineOperand &WBReg = MI.getOperand(0);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+
+    // Similar, but also check that DestReg is not SP.
+    case ARM::LDR_POST_REG:
+    case ARM::LDR_POST_IMM:
+    case ARM::LDRB_POST_REG:
+    case ARM::LDRB_POST_IMM:
+    case ARM::LDRH_POST:
+    case ARM::LDRSH_POST:
+    case ARM::LDRSB_POST: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      const MachineOperand &WBReg = MI.getOperand(1);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+  }
+
+  return (NextInstrMasksSP(MI));
+}
+
+bool NeedSandboxStackChange(const MachineInstr &MI,
+                               const TargetRegisterInfo *TRI) {
+  return (IsStackChange(MI, TRI) && !IsSandboxedStackChange(MI));
+}
+
+} // namespace ARM_SFI
+
+/**********************************************************************/
+
+void ARMNaClRewritePass::getAnalysisUsage(AnalysisUsage &AU) const {
+  // Slight (possibly unnecessary) efficiency tweak:
+  // Promise not to modify the CFG.
+  AU.setPreservesCFG();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/*
+ * A primitive validator to catch problems at compile time.
+ * E.g., it could be used along with bugpoint to reduce a bitcode file.
+ */
+void ARMNaClRewritePass::LightweightVerify(MachineFunction &MF) {
+
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+       MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
+         MBBI != MBBE;
+         ++MBBI) {
+      MachineInstr &MI = *MBBI;
+
+      if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+        dbgs() << "LightWeightVerify for function: "
+               << MF.getFunction()->getName() << "  (BAD STACK CHANGE)\n";
+        DumpInstructionVerbose(MI);
+        DumpBasicBlockVerbose(MBB);
+        //        assert(false && "LightweightVerify Failed");
+      }
+    }
+  }
+}
+
+void ARMNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  // (1) Ensure there is room in the bundle for a data mask instruction
+  // (nop'ing to the next bundle if needed).
+  // (2) Do a data mask on SP after the instruction that updated SP.
+  MachineInstr &MI = *MBBI;
+
+  // Use same predicate as current instruction.
+  ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(ARM::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr (one + to get the original, and one more + to get past)
+  MachineBasicBlock::iterator MBBINext = (MBBI++);
+  MachineBasicBlock::iterator MBBINext2 = (MBBI++);
+
+  BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
+          TII->get(ARM::SFI_DATA_MASK))
+      .addReg(ARM::SP)         // modify SP (as dst)
+      .addReg(ARM::SP)         // start with SP (as src)
+      .addImm((int64_t) Pred)  // predicate condition
+      .addReg(ARM::CPSR);      // predicate source register (CPSR)
+
+  return;
+}
+
+bool ARMNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified |= true;
+    }
+  }
+  return Modified;
+}
+
+bool ARMNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+
+    if (IsReturn(MI)) {
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_RETURN))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectJump(MI)) {
+      MachineOperand &Addr = MI.getOperand(0);
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_JMP))
+        .addOperand(Addr)        // rD
+        .addReg(0)               // apparently unused source register?
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsDirectCall(MI)) {
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_CALL))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectCall(MI)) {
+      MachineOperand &Addr = MI.getOperand(0);
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_CALL))
+        .addOperand(Addr)        // rD
+        .addReg(0)               // apparently unused source register?
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+        Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+bool ARMNaClRewritePass::TryPredicating(MachineInstr &MI, ARMCC::CondCodes Pred) {
+  // Can't predicate if it's already predicated.
+  // TODO(cbiffle): actually we can, if the conditions match.
+  if (TII->isPredicated(&MI)) return false;
+
+  /*
+   * ARM predicate operands use two actual MachineOperands: an immediate
+   * holding the predicate condition, and a register referencing the flags.
+   */
+  SmallVector<MachineOperand, 2> PredOperands;
+  PredOperands.push_back(MachineOperand::CreateImm((int64_t) Pred));
+  PredOperands.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
+
+  // This attempts to rewrite, but some instructions can't be predicated.
+  return TII->PredicateInstruction(&MI, PredOperands);
+}
+
+static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMSIA:
+    *AddrIdx = 0;
+    break;
+  // Instructions with base address register in position 1...
+  case ARM::LDMIA_UPD: // same reg at position 0 and position 1
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+
+  case ARM::LDRSB:
+  case ARM::LDRH:
+  case ARM::LDRSH:
+
+  case ARM::LDRi12:
+  case ARM::LDRrs:
+  case ARM::LDRBi12:
+  case ARM::LDRBrs:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VLDRS:
+  case ARM::VLDRD:
+
+  case ARM::LDREX:
+  case ARM::LDREXB:
+  case ARM::LDREXH:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::LDR_PRE_REG:
+  case ARM::LDR_PRE_IMM:
+  case ARM::LDR_POST_REG:
+  case ARM::LDR_POST_IMM:
+
+  case ARM::LDRB_PRE_REG:
+  case ARM::LDRB_PRE_IMM:
+  case ARM::LDRB_POST_REG:
+  case ARM::LDRB_POST_IMM:
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+
+  case ARM::LDRD:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ * Sandboxes a memory reference instruction by inserting an appropriate mask
+ * or check operation before it.
+ */
+void ARMNaClRewritePass::SandboxMemory(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       MachineInstr &MI,
+                                       int AddrIdx,
+                                       bool CPSRLive,
+                                       bool IsLoad) {
+  MachineOperand &Addr = MI.getOperand(AddrIdx);
+
+  if (!CPSRLive && TryPredicating(MI, ARMCC::EQ)) {
+    /*
+     * For unconditional memory references where CPSR is not in use, we can use
+     * a faster sandboxing sequence by predicating the load/store -- assuming we
+     * *can* predicate the load/store.
+     */
+
+    // TODO(sehr): add SFI_GUARD_SP_LOAD_TST.
+    // Instruction can be predicated -- use the new sandbox.
+    BuildMI(MBB, MBBI, MI.getDebugLoc(),
+            TII->get(ARM::SFI_GUARD_LOADSTORE_TST))
+      .addOperand(Addr)   // rD
+      .addReg(0);         // apparently unused source register?
+  } else {
+    unsigned Opcode;
+    if (IsLoad && (MI.getOperand(0).getReg() == ARM::SP)) {
+      Opcode = ARM::SFI_GUARD_SP_LOAD;
+    } else {
+      Opcode = ARM::SFI_GUARD_LOADSTORE;
+    }
+    // Use the older BIC sandbox, which is universal, but incurs a stall.
+    ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+      .addOperand(Addr)        // rD
+      .addReg(0)               // apparently unused source register?
+      .addImm((int64_t) Pred)  // predicate condition
+      .addReg(ARM::CPSR);      // predicate source register (CPSR)
+
+    /*
+     * This pseudo-instruction is intended to generate something resembling the
+     * following, but with alignment enforced.
+     * TODO(cbiffle): move alignment into this function, use the code below.
+     *
+     *  // bic<cc> Addr, Addr, #0xC0000000
+     *  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+     *          TII->get(ARM::BICri))
+     *    .addOperand(Addr)        // rD
+     *    .addOperand(Addr)        // rN
+     *    .addImm(0xC0000000)      // imm
+     *    .addImm((int64_t) Pred)  // predicate condition
+     *    .addReg(ARM::CPSR)       // predicate source register (CPSR)
+     *    .addReg(0);              // flag output register (0 == no flags)
+     */
+  }
+}
+
+static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMSIA:
+    *AddrIdx = 0;
+    break;
+
+  // Instructions with base address register in position 1...
+  case ARM::STMIA_UPD: // same reg at position 0 and position 1
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+
+  case ARM::STRH:
+  case ARM::STRi12:
+  case ARM::STRrs:
+  case ARM::STRBi12:
+  case ARM::STRBrs:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VSTRS:
+  case ARM::VSTRD:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::STR_PRE_REG:
+  case ARM::STR_PRE_IMM:
+  case ARM::STR_POST_REG:
+  case ARM::STR_POST_IMM:
+
+  case ARM::STRB_PRE_REG:
+  case ARM::STRB_PRE_IMM:
+  case ARM::STRB_POST_REG:
+  case ARM::STRB_POST_IMM:
+
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+
+
+  case ARM::STRD:
+  case ARM::STREX:
+  case ARM::STREXB:
+  case ARM::STREXH:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+bool ARMNaClRewritePass::SandboxMemoryReferencesInBlock(
+    MachineBasicBlock &MBB) {
+  /*
+   * This is a simple local reverse-dataflow analysis to determine where CPSR
+   * is live.  We cannot use the conditional store sequence anywhere that CPSR
+   * is live, or we'd affect correctness.  The existing liveness analysis passes
+   * barf when applied pre-emit, after allocation, so we must do it ourselves.
+   */
+
+  // LOCALMOD(pdox): Short-circuit this function. Assume CPSR is always live,
+  //                 until we figure out why the assert is tripping.
+  bool Modified2 = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) {
+      bool CPSRLive = true;
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true);
+      Modified2 = true;
+    }
+    if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) {
+      bool CPSRLive = true;
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false);
+      Modified2 = true;
+    }
+  }
+  return Modified2;
+  // END LOCALMOD(pdox)
+
+  bool CPSRLive = IsCPSRLiveOut(MBB);
+
+  // Given that, record which instructions should not be altered to trash CPSR:
+  std::set<const MachineInstr *> InstrsWhereCPSRLives;
+  for (MachineBasicBlock::const_reverse_iterator MBBI = MBB.rbegin(),
+                                                 E = MBB.rend();
+       MBBI != E;
+       ++MBBI) {
+    const MachineInstr &MI = *MBBI;
+    // Check for kills first.
+    if (MI.modifiesRegister(ARM::CPSR, TRI)) CPSRLive = false;
+    // Then check for uses.
+    if (MI.readsRegister(ARM::CPSR)) CPSRLive = true;
+
+    if (CPSRLive) InstrsWhereCPSRLives.insert(&MI);
+  }
+
+  // Sanity check:
+  assert(CPSRLive == MBB.isLiveIn(ARM::CPSR)
+         && "CPSR Liveness analysis does not match cached live-in result.");
+
+  // Now: find and sandbox stores.
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) {
+      bool CPSRLive =
+        (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end());
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true);
+      Modified = true;
+    }
+    if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) {
+      bool CPSRLive =
+        (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end());
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+/**********************************************************************/
+
+bool ARMNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (MBB.hasAddressTaken()) {
+      //FIXME: use symbolic constant or get this value from some configuration
+      MBB.setAlignment(4);
+      Modified = true;
+    }
+
+    if (FlagSfiLoad || FlagSfiStore)
+      Modified |= SandboxMemoryReferencesInBlock(MBB);
+    if (FlagSfiBranch) Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)  Modified |= SandboxStackChangesInBlock(MBB);
+  }
+  DEBUG(LightweightVerify(MF));
+  return Modified;
+}
+
+/// createARMNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createARMNaClRewritePass() {
+  return new ARMNaClRewritePass();
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.h b/lib/Target/ARM/ARMNaClRewritePass.h
new file mode 100644
index 0000000000..c8854a54fc
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.h
@@ -0,0 +1,36 @@
+//===-- ARMNaClRewritePass.h - NaCl Sandboxing Pass    ------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARMNACLREWRITEPASS_H
+#define TARGET_ARMNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiZeroMask;
+  extern cl::opt<bool> FlagSfiData;
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+namespace ARM_SFI {
+
+bool IsStackChange(const llvm::MachineInstr &MI,
+                   const llvm::TargetRegisterInfo *TRI);
+bool IsSandboxedStackChange(const llvm::MachineInstr &MI);
+bool NeedSandboxStackChange(const llvm::MachineInstr &MI,
+                               const llvm::TargetRegisterInfo *TRI);
+
+} // namespace ARM_SFI
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
new file mode 100644
index 0000000000..ce68d4d92b
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
@@ -0,0 +1,329 @@
+//=== ARMMCNaCl.cpp -  Expansion of NaCl pseudo-instructions     --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-mc-nacl"
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> FlagSfiZeroMask("sfi-zero-mask");
+}
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitBICMask(MCStreamer &Out,
+                        unsigned Addr, int64_t  Pred, unsigned Mask) {
+  // bic\Pred \Addr, \Addr, #Mask
+  MCInst BICInst;
+  BICInst.setOpcode(ARM::BICri);
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rD
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rS
+  if (FlagSfiZeroMask) {
+    BICInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    BICInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  BICInst.addOperand(MCOperand::CreateImm(Pred));  // predicate
+  BICInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); // CPSR
+  BICInst.addOperand(MCOperand::CreateReg(0)); // flag out
+  Out.EmitInstruction(BICInst);
+}
+
+static void EmitTST(MCStreamer &Out, unsigned Reg) {
+  // tst \reg, #\MASK typically 0xc0000000
+  const unsigned Mask = 0xC0000000;
+  MCInst TSTInst;
+  TSTInst.setOpcode(ARM::TSTri);
+  TSTInst.addOperand(MCOperand::CreateReg(Reg));  // rS
+  if (FlagSfiZeroMask) {
+    TSTInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    TSTInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  TSTInst.addOperand(MCOperand::CreateImm((int64_t)ARMCC::AL)); // Always
+  TSTInst.addOperand(MCOperand::CreateImm(0)); // flag out
+}
+
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+// 
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 && 
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  int64_t  Pred = Saved[2].getOperand(2).getImm();
+  assert((ARM::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  EmitBICMask(Out, Addr, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble cond=
+  //   sfi_nops_to_force_slot3
+  assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link cond=
+  //   sfi_nops_to_force_slot2
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link cond=
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg cond=
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \cond
+  assert(I == 2 && (ARM::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  int64_t Pred = Saved[0].getOperand(0).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, ARM::LR, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_store_preamble reg cond ---->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg, \cond
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_cstore_preamble reg -->
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_data_tst \reg
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE_TST == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+
+  Out.EmitBundleLock();
+  EmitTST(Out, Reg);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+// This is ONLY used for loads into the stack pointer.
+static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 4 &&
+         (ARM::SFI_GUARD_SP_LOAD == Saved[0].getOpcode()) &&
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[1].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[3].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned AddrReg = Saved[0].getOperand(0).getReg();
+  unsigned SpReg = Saved[3].getOperand(0).getReg();
+  int64_t  Pred = Saved[3].getOperand(2).getImm();
+  assert((ARM::SP == SpReg) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, AddrReg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[2]);
+  EmitBICMask(Out, SpReg, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+// CustomExpandInstNaClARM -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+
+
+bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out) {
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+  // This routine only executes  if RecurseGuard == 0
+  static bool RecurseGuard = false; 
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  //No recursive calls allowed;
+  if (RecurseGuard) return false;
+
+  unsigned Opc = Inst.getOpcode();
+
+  DEBUG(dbgs() << "CustomExpandInstNaClARM("; Inst.dump(); dbgs() << ")\n");
+
+  // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+  // a stack guard in conjunction with a SFI_DATA_MASK
+
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // one or two instructions.
+  //
+  // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_*
+  // pseudo and the very next one or two instructions are used as arguments to
+  // the Emit*() functions in this file.  This is the reason why we have a
+  // doublely nested switch here.  First, to save the SFI_* pseudo, then to
+  // emit it and the next instruction
+
+  // By default, we only need to save two or three instructions
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base State, no saved instructions.
+    // If the current instruction is a SFI instruction, set the SaveCount
+    // and fall through.
+    switch (Opc) {
+    default:
+      SaveCount = 0; // Nothing to do.
+      return false;  // Handle this Inst elsewhere.
+    case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+      SaveCount = 3;
+      break;
+    case ARM::SFI_DATA_MASK:
+      SaveCount = 0; // Do nothing.
+      break;
+    case ARM::SFI_GUARD_CALL:
+    case ARM::SFI_GUARD_INDIRECT_CALL:
+    case ARM::SFI_GUARD_INDIRECT_JMP:
+    case ARM::SFI_GUARD_RETURN:
+    case ARM::SFI_GUARD_LOADSTORE:
+    case ARM::SFI_GUARD_LOADSTORE_TST:
+      SaveCount = 2;
+      break;
+    case ARM::SFI_GUARD_SP_LOAD:
+      SaveCount = 4;
+      break;
+    }
+  }
+
+  if (I < SaveCount) {
+    // Othewise, save the current Inst and return
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+    // Else fall through to next stat
+  }
+
+  if (SaveCount > 0) { 
+    assert(I == SaveCount && "Bookeeping Error");
+    SaveCount = 0; // Reset for next iteration
+    // The following calls may call Out.EmitInstruction()
+    // which must not again call CustomExpandInst ...
+    // So set RecurseGuard = 1;
+    RecurseGuard = true;
+
+    switch (Saved[0].getOpcode()) {
+    default:  /* No action required */      break;
+    case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case ARM::SFI_DATA_MASK:
+      assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst");
+      break;
+    case ARM::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE_TST:
+      EmitGuardLoadOrStoreTst(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_SP_LOAD:
+      EmitGuardSpLoad(I, Saved, Out);
+      break;
+    }
+    I = 0; // Reset I for next.
+    assert(RecurseGuard && "Illegal Depth");
+    RecurseGuard = false;
+    return true;
+  }
+
+  return false;
+}
+
+} // namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
new file mode 100644
index 0000000000..de7ed50662
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- ARMMCNaCl.h - Prototype for CustomExpandInstNaClARM   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCNACL_H
+#define ARMMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
new file mode 100644
index 0000000000..d39a60d41c
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
@@ -0,0 +1,261 @@
+//=== MipsMCNaCl.cpp -  Expansion of NaCl pseudo-instructions    --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-mc-nacl"
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitMask(MCStreamer &Out,
+                        unsigned Addr, unsigned Mask) {
+  // and \Addr, \Addr, \Mask
+  MCInst MaskInst;
+  MaskInst.setOpcode(Mips::AND);
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Mask));
+  Out.EmitInstruction(MaskInst);
+}
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+//
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 &&
+         (Mips::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (Mips::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  unsigned Mask = Saved[2].getOperand(2).getReg();
+  assert((Mips::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  EmitMask(Out, Addr, Mask);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble --->
+  //   sfi_nops_to_force_slot2
+  assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link --->
+  //   sfi_nops_to_force_slot1
+  //   sfi_code_mask \link \link \maskreg
+  assert(I == 3 && (Mips::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_CALL");
+
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \link \link \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_JMP");
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_load_store_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_LOADSTORE");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+// CustomExpandInstNaClMips -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+
+
+bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out) {
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+  // This routine only executes  if RecurseGuard == 0
+  static bool RecurseGuard = false;
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  //No recursive calls allowed;
+  if (RecurseGuard) return false;
+
+  unsigned Opc = Inst.getOpcode();
+
+  DEBUG(dbgs() << "CustomExpandInstNaClMips("; Inst.dump(); dbgs() << ")\n");
+
+  // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+  // a stack guard in conjunction with a SFI_DATA_MASK
+
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // one or two instructions.
+  //
+  // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_*
+  // pseudo and the very next one or two instructions are used as arguments to
+  // the Emit*() functions in this file.  This is the reason why we have a
+  // doublely nested switch here.  First, to save the SFI_* pseudo, then to
+  // emit it and the next instruction
+
+  // By default, we only need to save two or three instructions
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base State, no saved instructions.
+    // If the current instruction is a SFI instruction, set the SaveCount
+    // and fall through.
+    switch (Opc) {
+    default:
+      SaveCount = 0; // Nothing to do.
+      return false;  // Handle this Inst elsewhere.
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+    case Mips::SFI_GUARD_CALL:
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      SaveCount = 3;
+      break;
+    case Mips::SFI_DATA_MASK:
+      SaveCount = 0; // Do nothing.
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+    case Mips::SFI_GUARD_RETURN:
+    case Mips::SFI_GUARD_LOADSTORE:
+      SaveCount = 2;
+      break;
+    }
+  }
+
+  if (I < SaveCount) {
+    // Othewise, save the current Inst and return
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+    // Else fall through to next stat
+  }
+
+  if (SaveCount > 0) {
+    assert(I == SaveCount && "Bookeeping Error");
+    SaveCount = 0; // Reset for next iteration
+    // The following calls may call Out.EmitInstruction()
+    // which must not again call CustomExpandInst ...
+    // So set RecurseGuard = 1;
+    RecurseGuard = true;
+
+    switch (Saved[0].getOpcode()) {
+    default:  /* No action required */      break;
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case Mips::SFI_DATA_MASK:
+      assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst");
+      break;
+    case Mips::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    }
+    I = 0; // Reset I for next.
+    assert(RecurseGuard && "Illegal Depth");
+    RecurseGuard = false;
+    return true;
+  }
+  return false;
+}
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
new file mode 100644
index 0000000000..c90502ec33
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- MipsMCNaCl.h - Prototype for CustomExpandInstNaClMips ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCNACL_H
+#define MIPSMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsNaClHeaders.cpp b/lib/Target/Mips/MipsNaClHeaders.cpp
new file mode 100644
index 0000000000..375c287d67
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClHeaders.cpp
@@ -0,0 +1,128 @@
+//===-- MipsNaClHeaders.cpp - Print SFI headers to an Mips .s file --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in Mips assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "MipsNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitMipsSFIHeaders(raw_ostream &O) {
+  O << " # ========================================\n";
+  O << "# Branch: " << FlagSfiBranch << "\n";
+  O << "# Stack: " << FlagSfiStack << "\n";
+  O << "# Store: " << FlagSfiStore << "\n";
+  O << "# Load: " << FlagSfiLoad << "\n";
+
+  O << " # ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0x00000000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  O <<
+    "\t.macro sfi_data_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_code_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jr"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jalr"
+    O <<
+      "\t.macro sfi_indirect_call_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nops_to_force_slot1\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " # ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg1, \\reg2 , \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " # ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp
new file mode 100644
index 0000000000..cce770eebd
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.cpp
@@ -0,0 +1,333 @@
+//===-- MipsNaClRewritePass.cpp - Native Client Rewrite Pass  -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+//
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-sfi"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+unsigned Mips::IndirectBranchMaskReg = Mips::T6;
+unsigned Mips::LoadStoreStackMaskReg = Mips::T7;
+
+namespace {
+  class MipsNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    MipsNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const MipsInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Mips Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxLoadsInBlock(MachineBasicBlock &MBB);
+    bool SandboxStoresInBlock(MachineBasicBlock &MBB);
+    void SandboxLoadStore(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI,
+                      MachineInstr &MI,
+                      int AddrIdx);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void AlignAllJumpTargets(MachineFunction &MF);
+  };
+  char MipsNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JR);
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JALR);
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  return ((MI.getOpcode() == Mips::JAL) || (MI.getOpcode() == Mips::BGEZAL)
+       || (MI.getOpcode() == Mips::BLTZAL));
+;
+}
+
+static bool IsStackMask(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::SFI_DATA_MASK);
+}
+
+static bool NeedSandboxStackChange(const MachineInstr &MI,
+                                   const TargetRegisterInfo *TRI) {
+  if (IsDirectCall(MI) || IsIndirectCall(MI)) {
+    // We check this first because method modifiesRegister
+    // returns true for calls.
+    return false;
+  }
+  return (MI.modifiesRegister(Mips::SP, TRI) && !IsStackMask(MI));
+}
+
+void MipsNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr (one + to get the original, and one more + to get past).
+  MachineBasicBlock::iterator MBBINext = (MBBI++);
+  MachineBasicBlock::iterator MBBINext2 = (MBBI++);
+
+  BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
+          TII->get(Mips::SFI_DATA_MASK), Mips::SP)
+          .addReg(Mips::SP)
+          .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+bool MipsNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+      MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+
+    if (IsReturn(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_RETURN), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsIndirectJump(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_JMP), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsDirectCall(MI)) {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_CALL));
+      Modified = true;
+    } else if (IsIndirectCall(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_CALL), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+/*
+ * Sandboxes a load or store instruction by inserting an appropriate mask
+ * operation before it.
+ */
+void MipsNaClRewritePass::SandboxLoadStore(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      MachineInstr &MI,
+                                      int AddrIdx) {
+  unsigned BaseReg = MI.getOperand(AddrIdx).getReg();
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_GUARD_LOADSTORE), BaseReg)
+      .addReg(BaseReg)
+      .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::LB:
+  case Mips::LBu:
+  case Mips::LH:
+  case Mips::LHu:
+  case Mips::LW:
+  case Mips::LWC1:
+  case Mips::LDC1:
+  case Mips::LL:
+  case Mips::LWL:
+  case Mips::LWR:
+    *AddrIdx = 1;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::SB:
+  case Mips::SH:
+  case Mips::SW:
+  case Mips::SWC1:
+  case Mips::SDC1:
+  case Mips::SWL:
+  case Mips::SWR:
+    *AddrIdx = 1;
+    break;
+
+  case Mips::SC:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+bool MipsNaClRewritePass::SandboxLoadsInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousLoad(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxStoresInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousStore(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+// Make sure all jump targets are aligned
+void MipsNaClRewritePass::AlignAllJumpTargets(MachineFunction &MF) {
+  // JUMP TABLE TARGETS
+  MachineJumpTableInfo *jt_info = MF.getJumpTableInfo();
+  if (jt_info) {
+    const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables();
+    for (unsigned i=0; i < JT.size(); ++i) {
+      std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs;
+
+      for (unsigned j=0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(4);
+      }
+    }
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+                           I != E; ++I) {
+    MachineBasicBlock &MBB = *I;
+    if (MBB.hasAddressTaken())
+      MBB.setAlignment(4);
+  }
+}
+
+bool MipsNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (FlagSfiLoad)
+      Modified |= SandboxLoadsInBlock(MBB);
+    if (FlagSfiStore)
+      Modified |= SandboxStoresInBlock(MBB);
+    if (FlagSfiBranch)
+      Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)
+      Modified |= SandboxStackChangesInBlock(MBB);
+  }
+
+  if (FlagSfiBranch)
+    AlignAllJumpTargets(MF);
+
+  return Modified;
+}
+
+/// createMipsNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createMipsNaClRewritePass() {
+  return new MipsNaClRewritePass();
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.h b/lib/Target/Mips/MipsNaClRewritePass.h
new file mode 100644
index 0000000000..4e729ec985
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.h
@@ -0,0 +1,21 @@
+//===-- MipsNaClRewritePass.h - NaCl Sandboxing Pass    ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPSNACLREWRITEPASS_H
+#define TARGET_MIPSNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
new file mode 100644
index 0000000000..6b42feee68
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
@@ -0,0 +1,803 @@
+//=== X86MCNaCl.cpp - Expansion of NaCl pseudo-instructions      --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCNaCl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// This option makes it possible to overwrite the x86 jmp mask immediate.
+// Setting it to -1 will effectively turn masking into a nop which will
+// help with linking this code with non-sandboxed libs (at least for x86-32).
+cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32));
+
+static unsigned PrefixSaved = 0;
+static bool PrefixPass = false;
+
+// See the note below where this function is defined.
+namespace llvm {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false);
+}
+
+static void EmitDirectCall(const MCOperand &Op, bool Is64Bit,
+                           MCStreamer &Out) {
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+  CALLInst.addOperand(Op);
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
+                               MCStreamer &Out) {
+  const int JmpMask = FlagSfiX86JmpMask;
+  const unsigned Reg32 = Op.getReg();
+  const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+
+  if (IsCall)
+    Out.EmitBundleAlignEnd();
+
+  Out.EmitBundleLock();
+
+  MCInst ANDInst;
+  ANDInst.setOpcode(X86::AND32ri8);
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateImm(JmpMask));
+  Out.EmitInstruction(ANDInst);
+
+  if (Is64Bit) {
+    MCInst InstADD;
+    InstADD.setOpcode(X86::ADD64rr);
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(X86::R15));
+    Out.EmitInstruction(InstADD);
+  }
+
+  if (IsCall) {
+    MCInst CALLInst;
+    CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r);
+    CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(CALLInst);
+  } else {
+    MCInst JMPInst;
+    JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r);
+    JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(JMPInst);
+  }
+  Out.EmitBundleUnlock();
+}
+
+static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) {
+  MCInst POPInst;
+  POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
+  POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX));
+  Out.EmitInstruction(POPInst);
+
+  if (AmtOp) {
+    assert(!Is64Bit);
+    MCInst ADDInst;
+    unsigned ADDReg = X86::ESP;
+    ADDInst.setOpcode(X86::ADD32ri);
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(*AmtOp);
+    Out.EmitInstruction(ADDInst);
+  }
+
+  MCInst JMPInst;
+  JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
+  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  Out.EmitInstruction(JMPInst);
+}
+
+static void EmitTrap(bool Is64Bit, MCStreamer &Out) {
+  // Rewrite to:
+  //    X86-32:  mov $0, 0
+  //    X86-64:  mov $0, (%r15)
+  unsigned BaseReg = Is64Bit ? X86::R15 : 0;
+  MCInst Tmp;
+  Tmp.setOpcode(X86::MOV32mi);
+  Tmp.addOperand(MCOperand::CreateReg(BaseReg)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1)); // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0)); // IndexReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Value
+
+  Out.EmitInstruction(Tmp);
+}
+
+// Fix a register after being truncated to 32-bits.
+static void EmitRegFix(unsigned Reg64, MCStreamer &Out) {
+  // lea (%rsp, %r15, 1), %rsp
+  MCInst Tmp;
+  Tmp.setOpcode(X86::LEA64r);
+  Tmp.addOperand(MCOperand::CreateReg(Reg64));    // DestReg
+  Tmp.addOperand(MCOperand::CreateReg(Reg64));    // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+  Tmp.addOperand(MCOperand::CreateReg(X86::R15)); // IndexReg
+  Tmp.addOperand(MCOperand::CreateImm(0));        // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+  Out.EmitInstruction(Tmp);
+}
+
+static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp,
+                        MCStreamer &Out) {
+  Out.EmitBundleLock();
+
+  MCInst Tmp;
+  Tmp.setOpcode(Opc);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(ImmOp);
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) {
+  Out.EmitBundleLock();
+
+  MCInst Tmp;
+  Tmp.setOpcode(X86::LEA64_32r);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); // DestReg
+  Tmp.addOperand(MCOperand::CreateReg(X86::RBP)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0));        // IndexReg
+  Tmp.addOperand(ImmOp);                          // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitPrefix(unsigned Opc, MCStreamer &Out) {
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+
+  MCInst PrefixInst;
+  PrefixInst.setOpcode(Opc);
+  PrefixPass = true;
+  Out.EmitInstruction(PrefixInst);
+
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+}
+
+static void EmitMoveRegReg(bool Is64Bit, unsigned ToReg,
+                           unsigned FromReg, MCStreamer &Out) {
+  MCInst Move;
+  Move.setOpcode(Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+  Move.addOperand(MCOperand::CreateReg(ToReg));
+  Move.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(Move);
+}
+
+static void EmitMoveRegImm32(bool Is64Bit, unsigned ToReg,
+                             unsigned Imm32, MCStreamer &Out) {
+  MCInst MovInst;
+  MovInst.setOpcode(X86::MOV32ri);
+  MovInst.addOperand(MCOperand::CreateReg(X86::EBX));
+  MovInst.addOperand(MCOperand::CreateImm(Imm32));
+  Out.EmitInstruction(MovInst);
+}
+
+static void EmitCmove(bool Is64Bit, unsigned ToReg,
+                      unsigned FromReg, MCStreamer &Out) {
+  MCInst CmovInst;
+  CmovInst.setOpcode(Is64Bit ? X86::CMOVE64rr : X86::CMOVE32rr);
+  CmovInst.addOperand(MCOperand::CreateReg(ToReg));
+  CmovInst.addOperand(MCOperand::CreateReg(ToReg));
+  CmovInst.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(CmovInst);
+}
+
+static void EmitClearReg(bool Is64Bit, unsigned Reg, MCStreamer &Out) {
+  MCInst Clear;
+  Clear.setOpcode(X86::XOR32rr);
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Out.EmitInstruction(Clear);
+}
+
+static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) {
+  unsigned Reg32 = getX86SubSuperRegister_(Reg64, MVT::i32);
+  EmitMoveRegReg(false, Reg32, Reg32, Out);
+}
+
+static void EmitPushReg(bool Is64Bit, unsigned FromReg, MCStreamer &Out) {
+  MCInst Push;
+  Push.setOpcode(Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+  Push.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(Push);
+}
+
+static void EmitPopReg(bool Is64Bit, unsigned ToReg, MCStreamer &Out) {
+  MCInst Pop;
+  Pop.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
+  Pop.addOperand(MCOperand::CreateReg(ToReg));
+  Out.EmitInstruction(Pop);
+}
+
+static void EmitLoad(bool Is64Bit,
+                     unsigned DestReg,
+                     unsigned BaseReg,
+                     unsigned Scale,
+                     unsigned IndexReg,
+                     unsigned Offset,
+                     unsigned SegmentReg,
+                     MCStreamer &Out) {
+  // Load DestReg from address BaseReg + Scale * IndexReg + Offset
+  MCInst Load;
+  Load.setOpcode(Is64Bit ? X86::MOV64rm : X86::MOV32rm);
+  Load.addOperand(MCOperand::CreateReg(DestReg));
+  Load.addOperand(MCOperand::CreateReg(BaseReg));
+  Load.addOperand(MCOperand::CreateImm(Scale));
+  Load.addOperand(MCOperand::CreateReg(IndexReg));
+  Load.addOperand(MCOperand::CreateImm(Offset));
+  Load.addOperand(MCOperand::CreateReg(SegmentReg));
+  Out.EmitInstruction(Load);
+}
+
+// Utility function for storing done by setjmp.
+// Creates a store from Reg into the address PtrReg + Offset.
+static void EmitStore(bool Is64Bit,
+                      unsigned BaseReg,
+                      unsigned Scale,
+                      unsigned IndexReg,
+                      unsigned Offset,
+                      unsigned SegmentReg,
+                      unsigned SrcReg,
+                      MCStreamer &Out) {
+  // Store SrcReg to address BaseReg + Scale * IndexReg + Offset
+  MCInst Store;
+  Store.setOpcode(Is64Bit ? X86::MOV64mr : X86::MOV32mr);
+  Store.addOperand(MCOperand::CreateReg(BaseReg));
+  Store.addOperand(MCOperand::CreateImm(Scale));
+  Store.addOperand(MCOperand::CreateReg(IndexReg));
+  Store.addOperand(MCOperand::CreateImm(Offset));
+  Store.addOperand(MCOperand::CreateReg(SegmentReg));
+  Store.addOperand(MCOperand::CreateReg(SrcReg));
+  Out.EmitInstruction(Store);
+}
+
+static void EmitAndRegReg(bool Is64Bit, unsigned DestReg,
+                          unsigned SrcReg, MCStreamer &Out) {
+  MCInst AndInst;
+  AndInst.setOpcode(X86::AND32rr);
+  AndInst.addOperand(MCOperand::CreateReg(DestReg));
+  AndInst.addOperand(MCOperand::CreateReg(DestReg));
+  AndInst.addOperand(MCOperand::CreateReg(SrcReg));
+  Out.EmitInstruction(AndInst);
+}
+
+static bool SandboxMemoryRef(MCInst *Inst,
+                             unsigned *IndexReg,
+                             MCStreamer &Out) {
+  for (unsigned i = 0, last = Inst->getNumOperands(); i < last; i++) {
+    if (!Inst->getOperand(i).isReg() ||
+        Inst->getOperand(i).getReg() != X86::PSEUDO_NACL_SEG) {
+      continue;
+    }
+    // Return the index register that will need to be truncated.
+    // The order of operands on a memory reference is always:
+    // (BaseReg, ScaleImm, IndexReg, DisplacementImm, SegmentReg),
+    // So if we found a match for a segment register value, we know that
+    // the index register is exactly two operands prior.
+    *IndexReg = Inst->getOperand(i - 2).getReg();
+    // Remove the PSEUDO_NACL_SEG annotation.
+    Inst->getOperand(i).setReg(0);
+    return true;
+  }
+  return false;
+}
+
+static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) {
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+
+  MCInst LeaInst;
+  LeaInst.setOpcode(X86::LEA32r);
+  LeaInst.addOperand(MCOperand::CreateReg(X86::EAX));    // DestReg
+  LeaInst.addOperand(Inst.getOperand(0)); // BaseReg
+  LeaInst.addOperand(Inst.getOperand(1)); // Scale
+  LeaInst.addOperand(Inst.getOperand(2)); // IndexReg
+  LeaInst.addOperand(Inst.getOperand(3)); // Offset
+  LeaInst.addOperand(Inst.getOperand(4)); // SegmentReg
+  Out.EmitInstruction(LeaInst);
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(X86::CALLpcrel32);
+  MCContext &context = Out.getContext();
+  const MCSymbolRefExpr *expr =
+    MCSymbolRefExpr::Create(
+      context.GetOrCreateSymbol(StringRef("___tls_get_addr")),
+      MCSymbolRefExpr::VK_PLT, context);
+  CALLInst.addOperand(MCOperand::CreateExpr(expr));
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+
+static void EmitREST(const MCInst &Inst, unsigned Reg32, bool IsMem, MCStreamer &Out) {
+  unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+  Out.EmitBundleLock();
+  if (!IsMem) {
+    EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out);
+  } else {
+    unsigned IndexReg;
+    MCInst SandboxedInst = Inst;
+    if (SandboxMemoryRef(&SandboxedInst, &IndexReg, Out)) {
+      EmitRegTruncate(IndexReg, Out);
+    }
+    EmitLoad(false,
+             Reg32,
+             SandboxedInst.getOperand(0).getReg(),  // BaseReg
+             SandboxedInst.getOperand(1).getImm(),  // Scale
+             SandboxedInst.getOperand(2).getReg(),  // IndexReg
+             SandboxedInst.getOperand(3).getImm(),  // Offset
+             SandboxedInst.getOperand(4).getReg(),  // SegmentReg
+             Out);
+  }
+
+  EmitRegFix(Reg64, Out);
+  Out.EmitBundleUnlock();
+}
+
+// Does the x86 platform specific work for setjmp.
+// It expects that a pointer to a JMP_BUF in %ecx/%rdi, and that the return
+// address is in %edx/%rdx.
+// The JMP_BUF is a structure that has the maximum size over all supported
+// architectures.  The callee-saves registers plus [er]ip and [er]sp are stored
+// into the JMP_BUF.
+static void EmitSetjmp(bool Is64Bit, MCStreamer &Out) {
+  unsigned JmpBuf = Is64Bit ? X86::RDI : X86::ECX;
+  unsigned RetAddr = Is64Bit ? X86::RDX : X86::EDX;
+  if (Is64Bit) {
+    unsigned BasePtr = X86::R15;
+    unsigned Segment = X86::PSEUDO_NACL_SEG;
+    // Save the registers.
+    EmitStore(true, BasePtr, 1, JmpBuf,  0, Segment, X86::RBX, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf,  8, Segment, X86::RBP, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 16, Segment, X86::RSP, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 24, Segment, X86::R12, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 32, Segment, X86::R13, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 40, Segment, X86::R14, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 48, Segment, X86::RDX, Out);
+  } else {
+    // Save the registers.
+    EmitStore(false, JmpBuf, 1, 0,  0, 0, X86::EBX, Out);
+    EmitStore(false, JmpBuf, 1, 0,  4, 0, X86::EBP, Out);
+    EmitStore(false, JmpBuf, 1, 0,  8, 0, X86::ESP, Out);
+    EmitStore(false, JmpBuf, 1, 0, 12, 0, X86::ESI, Out);
+    EmitStore(false, JmpBuf, 1, 0, 16, 0, X86::EDI, Out);
+    EmitStore(false, JmpBuf, 1, 0, 20, 0, X86::EDX, Out);
+  }
+  // Return 0.
+  EmitClearReg(false, X86::EAX, Out);
+}
+
+// Does the x86 platform specific work for longjmp other than normalizing the
+// return parameter (returns of zero are changed to return 1 in the caller).
+// It expects that a pointer to a JMP_BUF in %ecx/%rdi, and that the return
+// value is in %eax.
+// The JMP_BUF is a structure that has the maximum size over all supported
+// architectures.  The saved registers are restored from the JMP_BUF.
+static void EmitLongjmp(bool Is64Bit, MCStreamer &Out) {
+  unsigned JmpBuf = Is64Bit ? X86::RDI : X86::ECX;
+  // If the return value was 0, make it 1.
+  EmitAndRegReg(false, X86::EAX, X86::EAX, Out);
+  EmitMoveRegImm32(false, X86::EBX, 1, Out);
+  EmitCmove(false, X86::EAX, X86::EBX, Out);
+  if (Is64Bit) {
+    unsigned BasePtr = X86::R15;
+    unsigned Segment = X86::PSEUDO_NACL_SEG;
+    // Restore the registers.
+    EmitLoad(true, X86::RBX, BasePtr, 1, JmpBuf,  0, Segment, Out);
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf,  8, Segment, Out);
+    // restbp
+    Out.EmitBundleLock();
+    EmitRegTruncate(X86::RBP, Out);
+    EmitRegFix(X86::RBP, Out);
+    Out.EmitBundleUnlock();
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf, 16, Segment, Out);
+    // restsp
+    Out.EmitBundleLock();
+    EmitRegTruncate(X86::RSP, Out);
+    EmitRegFix(X86::RSP, Out);
+    Out.EmitBundleUnlock();
+    EmitLoad(true, X86::R12, BasePtr, 1, JmpBuf, 24, Segment, Out);
+    EmitLoad(true, X86::R13, BasePtr, 1, JmpBuf, 32, Segment, Out);
+    EmitLoad(true, X86::R14, BasePtr, 1, JmpBuf, 40, Segment, Out);
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf, 48, Segment, Out);
+  } else {
+    // Restore the registers.
+    EmitLoad(false, X86::EBX, JmpBuf, 1, 0,  0, 0, Out);
+    EmitLoad(false, X86::EBP, JmpBuf, 1, 0,  4, 0, Out);
+    EmitLoad(false, X86::ESP, JmpBuf, 1, 0,  8, 0, Out);
+    EmitLoad(false, X86::ESI, JmpBuf, 1, 0, 12, 0, Out);
+    EmitLoad(false, X86::EDI, JmpBuf, 1, 0, 16, 0, Out);
+    EmitLoad(false, X86::ECX, JmpBuf, 1, 0, 20, 0, Out);
+  }
+  // Jmp to the saved return address.
+  MCInst JMPInst;
+  JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
+  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  Out.EmitInstruction(JMPInst);
+}
+
+namespace llvm {
+// CustomExpandInstNaClX86 -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+  unsigned Opc = Inst.getOpcode();
+  DEBUG(dbgs() << "CustomExpandInstNaClX86("; Inst.dump(); dbgs() << ")\n");
+  switch (Opc) {
+  case X86::LOCK_PREFIX:
+  case X86::REP_PREFIX:
+  case X86::REPNE_PREFIX:
+  case X86::REX64_PREFIX:
+    // Ugly hack because LLVM AsmParser is not smart enough to combine
+    // prefixes back into the instruction they modify.
+    if (PrefixPass) {
+      PrefixPass = false;
+      PrefixSaved = 0;
+      return false;
+    }
+    assert(PrefixSaved == 0);
+    PrefixSaved = Opc;
+    return true;
+  case X86::NACL_TRAP32:
+    assert(PrefixSaved == 0);
+    EmitTrap(false, Out);
+    return true;
+  case X86::NACL_TRAP64:
+    assert(PrefixSaved == 0);
+    EmitTrap(true, Out);
+    return true;
+  case X86::NACL_CALL32d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_CALL64d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), true, Out);
+    return true;
+  case X86::NACL_CALL32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, true, Out);
+    return true;
+  case X86::NACL_CALL64r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, true, Out);
+    return true;
+  case X86::NACL_JMP32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, false, Out);
+    return true;
+  case X86::NACL_TLS_addr32:
+    assert(PrefixSaved == 0);
+    EmitTLSAddr32(Inst, Out);
+    return true;
+  case X86::NACL_JMP64r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, false, Out);
+    return true;
+  case X86::NACL_RET32:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, false, Out);
+    return true;
+  case X86::NACL_RET64:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, true, Out);
+    return true;
+  case X86::NACL_RETI32:
+    assert(PrefixSaved == 0);
+    EmitRet(&Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_ASPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ASPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ANDSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::AND32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SPADJi32:
+    assert(PrefixSaved == 0);
+    EmitSPAdj(Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_RESTBPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, true, Out);
+    return true;
+  case X86::NACL_RESTBPr:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, false, Out);
+    return true;
+  case X86::NACL_RESTSPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, true, Out);
+    return true;
+  case X86::NACL_RESTSPr:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, false, Out);
+    return true;
+  // Intrinsics for eliminating platform specific .s code from the client
+  // side link.  These are recognized in X86InstrNaCl.td.
+  case X86::NACL_SETJ32:
+    EmitSetjmp(false, Out);
+    return true;
+  case X86::NACL_SETJ64:
+    EmitSetjmp(true, Out);
+    return true;
+  case X86::NACL_LONGJ32:
+    EmitLongjmp(false, Out);
+    return true;
+  case X86::NACL_LONGJ64:
+    EmitLongjmp(true, Out);
+    return true;
+  }
+
+  unsigned IndexReg;
+  MCInst SandboxedInst = Inst;
+  if (SandboxMemoryRef(&SandboxedInst, &IndexReg, Out)) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+
+    Out.EmitBundleLock();
+    EmitRegTruncate(IndexReg, Out);
+    if (PrefixLocal)
+      EmitPrefix(PrefixLocal, Out);
+    Out.EmitInstruction(SandboxedInst);
+    Out.EmitBundleUnlock();
+    return true;
+  }
+
+  if (PrefixSaved) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+    EmitPrefix(PrefixLocal, Out);
+  }
+  return false;
+}
+
+} // namespace llvm
+
+
+
+
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+//
+// This is an exact copy of getX86SubSuperRegister from X86RegisterInfo.h
+// We cannot use the original because it is part of libLLVMX86CodeGen,
+// which cannot be a dependency of this module (libLLVMX86Desc).
+//
+// However, in all likelyhood, the real getX86SubSuperRegister will
+// eventually be moved to MCTargetDesc, and then this copy can be
+// removed.
+
+namespace llvm {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return Reg;
+  case MVT::i8:
+    if (High) {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case MVT::i16:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case MVT::i32:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case MVT::i64:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+
+  return Reg;
+}
+}
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.h b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
new file mode 100644
index 0000000000..01b400d4d9
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
@@ -0,0 +1,19 @@
+//===-- X86MCNaCl.h - Prototype for CustomExpandInstNaClX86   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCNACL_H
+#define X86MCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/X86/X86InstrNaCl.td b/lib/Target/X86/X86InstrNaCl.td
new file mode 100644
index 0000000000..ecaabc643b
--- /dev/null
+++ b/lib/Target/X86/X86InstrNaCl.td
@@ -0,0 +1,433 @@
+//====- X86InstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the modifications to the X86 instruction set needed for
+// Native Client code generation.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NaCl specific DAG Nodes.
+//
+
+//===----------------------------------------------------------------------===//
+//
+//                       Native Client Pseudo-Instructions
+//
+// These instructions implement the Native Client pseudo-instructions, such
+// as nacljmp and naclasp.
+//
+// TableGen and MC consider these to be "real" instructions. They can be
+// parsed by the AsmParser and emitted by the AsmStreamer as if they
+// were just regular instructions. They are not marked "Pseudo" because
+// this would imply isCodeGenOnly=1, which would stop them from being
+// parsed by the assembler.
+//
+// These instructions cannot be encoded (written into an object file) by the
+// MCCodeEmitter. Instead, during direct object emission, they get lowered to
+// a sequence of streamer emits. (see X86InstrNaCl.cpp)
+//
+// These instructions should not be used in CodeGen. They have no pattern
+// and lack CodeGen metadata. Instead, the X86NaClRewritePass should
+// generate these instructions after CodeGen is finished.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// 32-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI32<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In32BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP32  : NaClPI32<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET32  : NaClPI32<(outs), (ins), "naclret">;
+  def NACL_RETI32 : NaClPI32<(outs), (ins i16imm:$amt), "naclreti\t$amt">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP32r : NaClPI32<(outs), (ins GR32:$dst), "nacljmp\t$dst">;
+}
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL32d : NaClPI32<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL32r : NaClPI32<(outs), (ins GR32:$dst),
+                     "naclcall\t$dst">;
+}
+
+// nacltlsaddr32 gets rewritten to:
+//     .bundle_align_end
+//     .bundle_lock
+//     leal\t$sym@TLSGD, %eax
+//     call\t___tls_get_addr@PLT
+//     .bundle_unlock
+// (The linker expects the leal+call sequence to be directly adjacent)
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP],
+    isAsmParserOnly = 1 in
+def NACL_TLS_addr32 : NaClPI32<(outs), (ins i32mem:$sym),
+                      "nacltlsaddr32\t$sym">;
+
+//===----------------------------------------------------------------------===//
+// 64-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI64<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In64BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP64  : NaClPI64<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET64  : NaClPI64<(outs), (ins), "naclret">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                    "nacljmp\t{$dst, $rZP|$rZP, $dst}">;
+}
+
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL64d : NaClPI64<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                     "naclcall\t$dst,$rZP">;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], isAsmParserOnly = 1 in {
+  def NACL_ASPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ASPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ANDSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclandsp{q}\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], Uses = [RBP], isAsmParserOnly = 1 in {
+  def NACL_SPADJi32  : NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                       "naclspadj\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], isAsmParserOnly = 1 in {
+  def NACL_RESTSPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTSPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+}
+
+def : MnemonicAlias<"naclrestsp", "naclrestsp_noflags">;
+
+let Defs = [RBP], isAsmParserOnly = 1 in {
+  def NACL_RESTBPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTBPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Code Generator Instructions (isCodeGenOnly == 1)
+//
+// These instructions exists to make CodeGen work with Native Client's
+// modifications.
+//
+// Many of these instructions exist because of limitations in CodeGen
+// or TableGen, and may become unnecessary in the future.
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 32-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// To avoid a naming conflict between call/naclcall, we have to
+// disable the real CALLpcrel32 and CALL32r instructions when targeting
+// for NaCl. Thus, they need to be produced here.
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+
+    def NACL_CG_CALLpcrel32 : I<0, Pseudo,
+                              (outs), (ins i32imm_pcrel:$dst, variable_ops),
+                              "naclcall\t$dst", []>,
+                              Requires<[IsNaCl, In32BitMode]>;
+    def NACL_CG_CALL32r     : I<0, Pseudo,
+                              (outs), (ins GR32:$dst, variable_ops),
+                              "naclcall\t$dst", [(X86call GR32:$dst)]>,
+                              Requires<[IsNaCl, In32BitMode]>;
+}
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALLpcrel32 tglobaladdr:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALLpcrel32 texternalsym:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (NACL_CG_CALLpcrel32 imm:$dst)>,
+          Requires<[IsNaCl, In32BitMode, CallImmAddr]>;
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 64-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// Because pointers are 32-bit on X86-64 Native Client, we need to
+// produce new versions of the JMP64/CALL64 instructions which can accept
+// addresses which are i32 instead of i64.
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def NACL_CG_JMP64r     : I<0, Pseudo, (outs), (ins GR32:$dst, variable_ops),
+                           "nacljmp\t$dst",
+                           [(brind GR32:$dst)]>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    def NACL_CG_CALL64pcrel32 : I<0, Pseudo, (outs),
+                                (ins i32imm_pcrel:$dst, variable_ops),
+                                "naclcall\t$dst", []>,
+                                Requires<[IsNaCl, In64BitMode]>;
+
+    def NACL_CG_CALL64r       : I<0, Pseudo, (outs), (ins GR32:$dst, variable_ops),
+                                "naclcall\t$dst,%r15",
+                                [(X86call GR32:$dst)]>,
+                                Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALL64pcrel32 tglobaladdr:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALL64pcrel32 texternalsym:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+
+// Tail calls
+// Also needed due to the i64 / i32 pointer problem.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+  def NACL_CG_TCRETURNdi64 : I<0, Pseudo, (outs),
+                             (ins i32imm_pcrel:$dst, i32imm:$offset, 
+                             variable_ops),
+                             "#TC_RETURN $dst $offset", []>,
+                          Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TCRETURNri64 : I<0, Pseudo, (outs),
+                            (ins GR32_TC_64:$dst, i32imm:$offset,
+                             variable_ops),
+                            "#TC_RETURN $dst $offset", []>,
+                            Requires<[IsNaCl, In64BitMode]>;
+
+  def NACL_CG_TAILJMPd64 : I<0, Pseudo, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
+                           "jmp\t$dst  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TAILJMPr64 : I<0, Pseudo, (outs),
+                           (ins GR32_TC_64:$dst, variable_ops),
+                           "nacljmp\t$dst,%r15  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret GR32_TC_64:$dst, imm:$off),
+          (NACL_CG_TCRETURNri64 GR32_TC_64:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+// ELF TLS Support
+
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def NACL_CG_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                         ".bundle_align_end"
+                         ".bundle_lock"
+                         "leal\t$sym, %eax; "
+                         "call\t___tls_get_addr@PLT"
+                         ".bundle_unlock",
+                         [(X86tlsaddr tls32addr:$sym)]>,
+                         Requires<[In32BitMode, IsNaCl]>;
+
+// These are lowered in X86NaClRewritePass.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in {
+def NACL_CG_GD_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_LE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_IE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+// For mtls-use-call.
+def NACL_CG_LE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+def NACL_CG_IE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+}
+
+let usesCustomInserter = 1, Defs = [EFLAGS] in
+def NACL_CG_VAARG_64 : I<0, Pseudo,
+                     (outs GR32:$dst),
+                     (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                     "#NACL_VAARG_64 $dst, $ap, $size, $mode, $align",
+                     [(set GR32:$dst,
+                     (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                     (implicit EFLAGS)]>,
+                     Requires<[IsNaCl, In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// NativeClient intrinsics
+// These provide the ability to implement several low-level features without
+// having to link native ASM code on the client.
+// These need to be kept in sync with in lib/Target/ARM/ARMInstrInfo.td and
+// lib/Target/X86/X86InstrNaCl.cpp.
+// TODO(sehr): Separate this code to allow NaCl and non-NaCl versions.
+
+// Saves all the callee-saves registers, [er]sp, and [er]ip to the JMP_BUF
+// structure pointed to by 4(%esp) or rdi.  The JMP_BUF structure is the
+// maximum size over all supported architectures.  The MC expansions happen
+// in X86InstrNaCl.cpp.
+let Uses = [ECX, RDX], Defs = [EAX, EFLAGS] in {
+  def NACL_SETJ32 : I<0, Pseudo, (outs), (ins),
+                     "movl %ebx, 0(%ecx); "
+                     "movl %ebp, 4(%ecx); "
+                     "movl %esp, 8(%ecx); "
+                     "movl %esi, 12(%ecx); "
+                     "movl %edi, 16(%ecx); "
+                     "movl %edx, 20(%ecx); "
+                     "xorl %eax, %eax; ",
+                     [(set EAX, (int_nacl_setjmp ECX, EDX))]>,
+                     Requires<[IsNaCl, In32BitMode]>;
+}
+let Uses = [EDI, RDX], Defs = [EAX, EFLAGS] in {
+  def NACL_SETJ64 : I<0, Pseudo, (outs), (ins),
+                      "movq %rbx, %nacl:0(%r15, %rdi); "
+                      "movq %rbp, %nacl:8(%r15, %rdi); "
+                      "movq %rsp, %nacl:16(%r15, %rdi); "
+                      "movq %r12, %nacl:24(%r15, %rdi); "
+                      "movq %r13, %nacl:32(%r15, %rdi); "
+                      "movq %r14, %nacl:40(%r15, %rdi); "
+                      "movq %rdx, %nacl:48(%r15, %rdi); "
+                      "xorl %eax, %eax; ",
+                      [(set EAX, (int_nacl_setjmp EDI, EDX))]>,
+                      Requires<[IsNaCl, In64BitMode]>;
+}
+
+// Restores all the callee-saves registers, [er]sp, and [er]ip from the JMP_BUF
+// structure pointed to by 4(%esp) or %rdi.  Returns the value in 8(%esp) or
+// %rsi at entry.  This implements the tail of longjmp, with the normalization
+// of the return value (if the caller passes zero to longjmp, it should return
+// 1) done in the caller. The MC expansions happen in X86InstrNaCl.cpp.
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    Uses = [EAX, ECX] in {
+  def NACL_LONGJ32 : I<0, Pseudo, (outs), (ins),
+                       "movl $$1, %ebx; "
+                       "andl %eax, %eax; "
+                       "cmovzl %ebx, %eax; "
+                       "movl 0(%ecx), %ebx; "
+                       "movl 4(%ecx), %ebp; "
+                       "movl 8(%ecx), %esp; "
+                       "movl 12(%ecx), %esi; "
+                       "movl 16(%ecx), %edi; "
+                       "movl 20(%ecx), %ecx; "
+                       "nacljmp %ecx; ",
+                       [(int_nacl_longjmp ECX, EAX)]>,
+                       Requires<[IsNaCl, In32BitMode]>, TB;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    Uses = [EAX, EDI, R15] in {
+  def NACL_LONGJ64 : I<0, Pseudo, (outs), (ins),
+                       "movl $$1, %ebx; "
+                       "andl %eax, %eax; "
+                       "cmovzl %ebx, %eax; "
+                       "movq %nacl:0(%r15, %edi), %rbx; "
+                       "movq %nacl:8(%r15, %edi), %rdx; "
+                       "naclrestbp %edx, %r15; "
+                       "movq %nacl:16(%r15, %edi), %rdx; "
+                       "naclrestsp %edx, %r15; "
+                       "movq %nacl:24(%r15, %edi), %r12; "
+                       "movq %nacl:32(%r15, %edi), %r13; "
+                       "movq %nacl:40(%r15, %edi), %r14; "
+                       "movq %nacl:48(%r15, %edi), %rcx; "
+                       "nacljmp %ecx, %r15; ",
+                       [(int_nacl_longjmp EDI, EAX)]>,
+                       Requires<[IsNaCl, In64BitMode]>, TB;
+}
diff --git a/lib/Target/X86/X86NaClJITInfo.cpp b/lib/Target/X86/X86NaClJITInfo.cpp
new file mode 100644
index 0000000000..e5ccbf960d
--- /dev/null
+++ b/lib/Target/X86/X86NaClJITInfo.cpp
@@ -0,0 +1,393 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target on Native Client
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86NaClJITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include <cstdlib>
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Valgrind.h"
+#ifdef __native_client__
+#include <nacl/nacl_dyncode.h>
+#endif
+
+using namespace llvm;
+
+extern cl::opt<int> FlagSfiX86JmpMask;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#elif defined(__pnacl__)
+#warning "PNaCl does not yet have JIT support"
+#else
+#error "Should not be building X86NaClJITInfo on non-x86"
+// TODO(dschuff): make this work under pnacl self-build?
+#endif
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+
+void X86NaClJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  // We don't know the original instruction boundaries, so we replace the
+  // whole bundle.
+  uint8_t buf[kBundleSize];
+  buf[0] = 0xE9;                // Emit JMP opcode.
+  intptr_t OldAddr = ((uintptr_t)Old + 1);
+  uint32_t NewOffset = (intptr_t)New - OldAddr - 4;// PC-relative offset of new
+  *((uint32_t*)(buf + 1)) = NewOffset;
+  memcpy(buf + 5, getNopSequence(kBundleSize - 5), kBundleSize - 5);
+
+#ifdef __native_client__
+  if(nacl_dyncode_create(Old, buf, kBundleSize)) {
+    report_fatal_error("machine code replacement failed");
+  }
+#endif
+
+  // X86 doesn't need to invalidate the processor cache, so just invalidate
+  // Valgrind's cache directly.
+  sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+extern "C" {
+#if defined(X86_64_JIT) || defined(__pnacl__) || !defined(__native_client__)
+void X86NaClCompilationCallback(void) {
+//TODO(dschuff): implement for X86-64
+}
+void X86NaClCompilationCallback_fastcc(void) {
+//TODO(dschuff): implement for X86-64
+}
+#else
+// Chrome system requirements include PIII, So SSE is present.
+// For now this is the same as X86CompilationCallback_SSE
+// In the future we could emit this rather than defining it with asm, for
+// compatibility with pnacl self-build
+// Also omit CFI junk (which is #defined away)
+
+// The difference between the 2 wrapper variants is that the first returns
+// through ecx and the 2nd returns through eax. The fastcc calling convention
+// uses ecx to pass arguments, and the C calling convention uses eax to pass
+// arguments with the 'inreg' attribute, so we make sure not to clobber it.
+// Returning through eax for fastcc and ecx for C clobbers the 'nest' parameter
+// breaking nested functions (which are not supported by clang in any case).
+
+void X86NaClCompilationCallback(void);
+asm(
+    ".text\n"
+    ".align 32\n"
+    ".globl " ASMPREFIX "X86NaClCompilationCallback\n"
+    TYPE_FUNCTION(X86NaClCompilationCallback)
+    ASMPREFIX "X86NaClCompilationCallback:\n"
+    "pushl %ebp\n"
+    "movl    %esp, %ebp\n"    // Standard prologue
+    "pushl   %eax\n"
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    "pushl   %ecx\n"
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86NaClCompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    "movaps  32(%esp), %xmm2\n"
+    "movaps  16(%esp), %xmm1\n"
+    "movaps  (%esp), %xmm0\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    "subl    $12, %esp\n"
+    "popl    %ecx\n"
+    "popl    %edx\n"
+    "popl    %eax\n"
+    "popl    %ebp\n"
+    "popl %ecx\n"
+    "nacljmp %ecx\n"
+    SIZE(X86NaClCompilationCallback)
+);
+
+
+
+void X86NaClCompilationCallback_fastcc(void);
+asm(
+    ".text\n"
+    ".align 32\n"
+    ".globl " ASMPREFIX "X86NaClCompilationCallback_fastcc\n"
+    TYPE_FUNCTION(X86NaClCompilationCallback_fastcc)
+    ASMPREFIX "X86NaClCompilationCallback_fastcc:\n"
+    "pushl %ebp\n"
+    "movl    %esp, %ebp\n"    // Standard prologue
+    "pushl   %eax\n"
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    "pushl   %ecx\n"
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86NaClCompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    "movaps  32(%esp), %xmm2\n"
+    "movaps  16(%esp), %xmm1\n"
+    "movaps  (%esp), %xmm0\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    "subl    $12, %esp\n"
+    "popl    %ecx\n"
+    "popl    %edx\n"
+    "popl    %eax\n"
+    "popl    %ebp\n"
+    "popl %eax\n"
+    "nacljmp %eax\n"
+    SIZE(X86NaClCompilationCallback_fastcc)
+);
+#endif
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call.  This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+
+// A stub has the following format:
+// | Jump opcode (1 byte) | Jump target +22 bytes | 3 bytes of NOPs
+//   | 18 bytes of NOPs | 1 halt | Call opcode (1 byte) | call target
+// The jump targets the call at the end of the bundle, which targets the
+// compilation callback. Once the compilation callback JITed the target
+// function it replaces the first 8 bytes of the stub in a single atomic
+// operation, retargeting the jump at the JITed function.
+
+static uint8_t *BundleRewriteBuffer;
+
+static void LLVM_ATTRIBUTE_USED
+X86NaClCompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+  // Get the return address from where the call instruction left it
+  intptr_t *RetAddrLoc = &StackPtr[1];
+  assert(*RetAddrLoc == RetAddr &&
+         "Could not find return address on the stack!");
+
+  // TODO: take a lock here. figure out whether it has to be the JIT lock or
+  // can be our own lock (or however we handle thread safety)
+#if 0
+  DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+               << " ESP=" << (void*)StackPtr << "\n");
+#endif
+
+  intptr_t StubStart = RetAddr - 32;
+  // This probably isn't necessary. I believe the corresponding code in
+  // X86JITInfo is vestigial, and AFAICT no non-stub calls to the compilation
+  // callback are generated anywhere. Still it doesn't hurt as a sanity check
+  bool isStub = *((unsigned char*)StubStart) == 0xE9 &&
+      *((int32_t*)(StubStart + 1)) == 22 &&
+      *((unsigned char*)(StubStart + 26)) == 0xF4;
+
+  assert(isStub && "NaCl doesn't support rewriting non-stub callsites yet");
+
+  // Backtrack so RetAddr points inside the stub (so JITResolver can find
+  // which function to compile)
+  RetAddr -= 4;
+
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+  // Rewrite the stub's call target, so that we don't end up here every time we
+  // execute the call.
+
+  // Get the first 8 bytes of the stub
+  memcpy(BundleRewriteBuffer, (void *)(StubStart), 8);
+  // Point the jump at the newly-JITed code
+  *((intptr_t *)(BundleRewriteBuffer + 1)) = NewVal - (StubStart + 5);
+
+  // Copy the new code
+#ifdef __native_client__
+  if(nacl_dyncode_modify((void *)StubStart, BundleRewriteBuffer, 8)) {
+    report_fatal_error("dyncode_modify failed");
+  }
+#endif
+  // TODO: release the lock
+
+  // Change our return address to execute the new jump
+  *RetAddrLoc = StubStart;
+}
+
+}
+
+const int X86NaClJITInfo::kBundleSize;
+
+TargetJITInfo::LazyResolverFn
+X86NaClJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return X86NaClCompilationCallback;
+}
+
+X86NaClJITInfo::X86NaClJITInfo(X86TargetMachine &tm) : X86JITInfo(tm) {
+  // FIXME: does LLVM have some way of doing static initialization?
+#ifndef __pnacl__
+  if(posix_memalign((void **)&BundleRewriteBuffer, kBundleSize, kBundleSize))
+    report_fatal_error("Could not allocate aligned memory");
+#else
+  BundleRewriteBuffer = NULL;
+#endif
+
+  NopString = new uint8_t[kBundleSize];
+  for (int i = 0; i < kBundleSize; i++) NopString[i] = 0x90;
+  X86Hlt.ins = new uint8_t[1];
+  X86Hlt.ins[0] = 0xf4;
+  X86Hlt.len = 1;
+}
+
+X86NaClJITInfo::~X86NaClJITInfo() {
+  delete [] NopString;
+  delete [] X86Hlt.ins;
+}
+
+TargetJITInfo::StubLayout X86NaClJITInfo::getStubLayout() {
+  // NaCl stubs must be full bundles because calls still have to be aligned
+  // even if they don't return
+  StubLayout Result = {kBundleSize, kBundleSize};
+  return Result;
+}
+
+
+void *X86NaClJITInfo::emitFunctionStub(const Function* F, void *Target,
+                                       JITCodeEmitter &JCE) {
+  bool TargetsCC = Target == (void *)(intptr_t)X86NaClCompilationCallback;
+
+  // If we target the compilation callback, swap it for a different one for
+  // functions using the fastcc calling convention
+  if(TargetsCC && F->getCallingConv() == CallingConv::Fast) {
+    Target = (void *)(intptr_t)X86NaClCompilationCallback_fastcc;
+  }
+
+  void *Result = (void *)JCE.getCurrentPCValue();
+  assert(RoundUpToAlignment((uintptr_t)Result, kBundleSize) == (uintptr_t)Result
+         && "Unaligned function stub");
+  if (!TargetsCC) {
+    // Jump to the target
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+    // Fill with Nops.
+    emitNopPadding(JCE, 27);
+  } else {
+    // Jump over 22 bytes
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE(22);
+    // emit 3-bytes of nop to ensure an instruction boundary at 8 bytes
+    emitNopPadding(JCE, 3);
+    // emit 18 bytes of nop
+    emitNopPadding(JCE, 18);
+    // emit 1 byte of halt. This helps CompilationCallback tell whether
+    // we came from a stub or not
+    JCE.emitByte(X86Hlt.ins[0]);
+    // emit a call to the compilation callback
+    JCE.emitByte(0xE8);
+    JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+  }
+  return Result;
+}
+
+// Relocations are the same as in X86, but the address being written
+// not the same as the address that the offset is relative to (see comment on
+// setRelocationBuffer in X86NaClJITInfo.h
+void X86NaClJITInfo::relocate(void *Function, MachineRelocation *MR,
+                    unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = RelocationBuffer + MR->getMachineCodeOffset();
+    void *RelocTargetPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((X86::RelocationType)MR->getRelocationType()) {
+    case X86::reloc_pcrel_word: {
+      // PC relative relocation, add the relocated value to the value already in
+      // memory, after we adjust it for where the PC is.
+      ResultPtr = ResultPtr -(intptr_t)RelocTargetPos - 4 - MR->getConstantVal();
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_picrel_word: {
+      // PIC base relative relocation, add the relocated value to the value
+      // already in memory, after we adjust it for where the PIC base is.
+      ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_absolute_word:
+    case X86::reloc_absolute_word_sext:
+      // Absolute relocation, just add the relocated value to the value already
+      // in memory.
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    case X86::reloc_absolute_dword:
+      *((intptr_t*)RelocPos) += ResultPtr;
+      break;
+    }
+  }
+}
+
+const uint8_t *X86NaClJITInfo::getNopSequence(size_t len) const {
+  // TODO(dschuff): use more efficient NOPs.
+  // Update emitNopPadding when it happens
+  assert((int)len <= kBundleSize &&
+         "Nop sequence can't be more than bundle size");
+  return NopString;
+}
+
+void X86NaClJITInfo::emitNopPadding(JITCodeEmitter &JCE, size_t len) {
+  for (size_t i = 0; i < len; i++) JCE.emitByte(NopString[i]);
+}
+
+const TargetJITInfo::HaltInstruction *X86NaClJITInfo::getHalt() const {
+  return &X86Hlt;
+}
+
+int X86NaClJITInfo::getBundleSize() const {
+  return kBundleSize;
+}
+
+int32_t X86NaClJITInfo::getJumpMask() const {
+  return FlagSfiX86JmpMask;
+}
diff --git a/lib/Target/X86/X86NaClJITInfo.h b/lib/Target/X86/X86NaClJITInfo.h
new file mode 100644
index 0000000000..9416efeff1
--- /dev/null
+++ b/lib/Target/X86/X86NaClJITInfo.h
@@ -0,0 +1,75 @@
+//=- X86NaClJITInfo.h - X86 implementation of the JIT interface  --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class for
+// Native Client
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86NACLJITINFO_H
+#define X86NACLJITINFO_H
+
+#include "X86JITInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+  class X86NaClJITInfo : public X86JITInfo {
+    void emitNopPadding(JITCodeEmitter &JCE, size_t len);
+    const X86Subtarget *Subtarget;
+    uintptr_t PICBase;
+    uint8_t *NopString;
+    HaltInstruction X86Hlt;
+    uint8_t *RelocationBuffer;
+   public:
+    static const int kBundleSize = 32;
+    explicit X86NaClJITInfo(X86TargetMachine &tm);
+    virtual ~X86NaClJITInfo();
+
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on X86 NaCl.
+    virtual StubLayout getStubLayout();
+
+    // Note: the emission and functions MUST NOT touch the target memory
+    virtual void *emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE);
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                        unsigned NumRelocs, unsigned char* GOTBase);
+
+    virtual char* allocateThreadLocalMemory(size_t size) {
+      //TODO(dschuff) Implement TLS or decide whether X86 TLS works
+      assert(0 && "This target does not implement thread local storage!");
+      return 0;
+    }
+    /// Return a string containing a sequence of NOPs which is valid for
+    /// the given length
+    virtual const uint8_t *getNopSequence(size_t len) const;
+    virtual const HaltInstruction *getHalt() const;
+    virtual int getBundleSize() const;
+    virtual int getJumpMask() const;
+    /// Relocations cannot happen in-place in NaCl because we can't write to
+    /// code. This function takes a pointer to where the code has been emitted,
+    /// before it is copied to the code region. The subsequent call to
+    /// relocate takes pointers to the target code location, but rewrites the
+    /// code in the relocation buffer rather than at the target
+    virtual void setRelocationBuffer(unsigned char * BufferBegin) {
+      RelocationBuffer = BufferBegin;
+    }
+  };
+}
+
+#endif
diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
new file mode 100644
index 0000000000..93728ddb08
--- /dev/null
+++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
@@ -0,0 +1,236 @@
+//=== X86NaClRewriteFinalPass.cpp - Expand NaCl pseudo-instructions  --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass expands NaCl pseudo-instructions into real instructions.
+// This duplicates much of the functionality found in X86MCNaCl.cpp but is
+// needed for non-MC JIT, which doesn't use MC. It expands pseudo instructions
+// into bundle-locked groups by emitting a BUNDLE_LOCK marker,
+// followed by the instructions, followed by a BUNDLE_UNLOCK marker.
+// The Code Emitter needs to ensure the alignment as it emits. Additionallly,
+// this pass needs to be run last, or the user at least needs to ensure that
+// subsequent passes do not reorder or remove any bundled groups.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-jit-sandboxing"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Function.h"
+
+using namespace llvm;
+
+extern cl::opt<int> FlagSfiX86JmpMask;
+
+namespace {
+  class X86NaClRewriteFinalPass : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86NaClRewriteFinalPass() : MachineFunctionPass(ID),
+        kJumpMask(FlagSfiX86JmpMask) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NaCl Pseudo-instruction expansion";
+    }
+
+  private:
+    const int kJumpMask;
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    bool Is64Bit;
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    void TraceLog(const char *fun,
+		  const MachineBasicBlock &MBB,
+		  const MachineBasicBlock::iterator MBBI) const;
+
+    void RewriteIndirectJump(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MBBI,
+                             bool Is64Bit,
+                             bool IsCall);
+    void RewriteDirectCall(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           bool Is64Bit);
+    bool ApplyCommonRewrites(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MBBI);
+
+  };
+
+  char X86NaClRewriteFinalPass::ID = 0;
+}
+
+void X86NaClRewriteFinalPass::RewriteIndirectJump(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI,
+    bool Is64Bit,
+    bool IsCall) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "rewrite indirect jump " << MBB);
+
+  unsigned reg32 = MI.getOperand(0).getReg();
+  unsigned reg64 = getX86SubSuperRegister(reg32, MVT::i64);
+
+  if (IsCall)
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END));
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_LOCK));
+
+  BuildMI(MBB, MBBI, DL, TII->get(X86::AND32ri8))
+    .addReg(reg32)
+    .addReg(reg32)
+    //.addOperand(MI.getOperand(0))//correct flags, but might be 64bit reg
+    .addImm(kJumpMask);
+
+  if (Is64Bit) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64rr))
+      .addReg(reg64)
+      .addReg(reg64)
+      .addReg(X86::R15);
+  }
+
+  if (IsCall) {
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r))
+        .addReg(Is64Bit ? reg64 : reg32);
+  } else {
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::JMP64r : X86::JMP32r))
+        .addReg(Is64Bit ? reg64 : reg32);
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_UNLOCK));
+  MI.eraseFromParent();
+
+  DEBUG(dbgs() << "done rewrite indirect jump " << MBB);
+}
+
+void X86NaClRewriteFinalPass::RewriteDirectCall(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI,
+    bool Is64Bit) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  DEBUG(dbgs() << "rewrite direct call " << MBB);
+  const MachineOperand &MO = MI.getOperand(0);
+  // rewrite calls to immediates as indirect calls.
+  if (MO.isImm()) {
+    DEBUG(dbgs() << " is immediate " << MO);
+    // First, rewrite as a move imm->reg + indirect call sequence,
+    BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32ri))
+            .addReg(X86::ECX)
+            .addOperand(MO);
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r))
+            .addReg(X86::ECX);
+    // Then use RewriteIndirectJump to sandbox it
+    MachineBasicBlock::iterator I = MBBI;
+    --I; // I now points at the call instruction
+    MI.eraseFromParent();
+    return RewriteIndirectJump(MBB, I, Is64Bit, true);
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END));
+
+  BuildMI(MBB, MBBI, DL,
+          TII->get(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32))
+          .addOperand(MI.getOperand(0));
+
+  MI.eraseFromParent();
+}
+
+bool X86NaClRewriteFinalPass::ApplyCommonRewrites(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  switch(Opcode) {
+  case X86::NACL_CALL32d:
+    RewriteDirectCall(MBB, MBBI, false);
+    break;
+  case X86::NACL_CALL64d:
+    RewriteDirectCall(MBB, MBBI, true);
+    break;
+  case X86::NACL_CALL32r:
+    RewriteIndirectJump(MBB, MBBI, false, true);
+    return true;
+  case X86::NACL_CALL64r:
+    RewriteIndirectJump(MBB, MBBI, true, true);
+    return true;
+  case X86::NACL_JMP32r:
+    RewriteIndirectJump(MBB, MBBI, false, false);
+    return true;
+  case X86::NACL_JMP64r:
+    RewriteIndirectJump(MBB, MBBI, true, false);
+    return true;
+  case X86::NACL_TRAP32:
+  case X86::NACL_TRAP64:
+  case X86::NACL_ASPi8:
+  case X86::NACL_ASPi32:
+  case X86::NACL_SSPi8:
+  case X86::NACL_SSPi32:
+  case X86::NACL_SPADJi32:
+  case X86::NACL_RESTBPm:
+  case X86::NACL_RESTBPr:
+  case X86::NACL_RESTSPm:
+  case X86::NACL_RESTSPr:
+  case X86::NACL_SETJ32:
+  case X86::NACL_SETJ64:
+  case X86::NACL_LONGJ32:
+  case X86::NACL_LONGJ64:
+    dbgs() << "inst, opcode not handled: " << MI << Opcode;
+    assert(false && "NaCl Pseudo-inst not handled");
+  case X86::NACL_RET32:
+  case X86::NACL_RET64:
+  case X86::NACL_RETI32:
+    assert(false && "Should not get RETs here");
+  }
+  return false;
+}
+
+bool X86NaClRewriteFinalPass::runOnMachineFunction(MachineFunction &MF) {
+  bool modified = false;
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  const X86Subtarget *subtarget = &TM->getSubtarget<X86Subtarget>();
+  assert(subtarget->isTargetNaCl() && "Target in NaClRewriteFinal is not NaCl");
+
+  DEBUG(dbgs() << "*************** NaCl Rewrite Final ***************\n");
+  DEBUG(dbgs() << " funcnum " << MF.getFunctionNumber() << " "
+               << MF.getFunction()->getName() << "\n");
+
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); 
+       MFI != E; ++MFI) {
+    modified |= runOnMachineBasicBlock(*MFI);
+  }
+
+  DEBUG(dbgs() << "************* NaCl Rewrite Final Done *************\n");
+  return modified;
+}
+
+bool X86NaClRewriteFinalPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI;
+       MBBI != MBB.end(); MBBI = NextMBBI) {
+    ++NextMBBI;
+    if (ApplyCommonRewrites(MBB, MBBI)) {
+      modified = true;
+    }
+  }
+  return modified;
+}
+
+// return an instance of the pass
+namespace llvm {
+  FunctionPass *createX86NaClRewriteFinalPass() {
+    return new X86NaClRewriteFinalPass();
+  }
+}
diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp
new file mode 100644
index 0000000000..9b0922d2d0
--- /dev/null
+++ b/lib/Target/X86/X86NaClRewritePass.cpp
@@ -0,0 +1,869 @@
+//=== X86NaClRewritePAss.cpp - Rewrite instructions for NaCl SFI --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that ensures stores and loads and stack/frame
+// pointer addresses are within the NaCl sandbox (for x86-64).
+// It also ensures that indirect control flow follows NaCl requirments.
+//
+// The other major portion of rewriting for NaCl is done in X86InstrNaCl.cpp,
+// which is responsible for expanding the NaCl-specific operations introduced
+// here and also the intrinsic functions to support setjmp, etc.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+  class X86NaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86NaClRewritePass() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NaCl Rewrites";
+    }
+
+  private:
+
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const X86Subtarget *Subtarget;
+    bool Is64Bit;
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    void TraceLog(const char *func,
+                  const MachineBasicBlock &MBB,
+                  const MachineBasicBlock::iterator MBBI) const;
+
+    bool ApplyRewrites(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI);
+    bool ApplyStackSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyMemorySFI(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI);
+
+    bool ApplyFrameSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyControlSFI(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI);
+
+    void PassLightWeightValidator(MachineBasicBlock &MBB);
+    bool AlignJumpTableTargets(MachineFunction &MF);
+    bool RewritePushfPopf(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineBasicBlock::iterator *Next);
+  };
+
+  char X86NaClRewritePass::ID = 0;
+
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI);
+
+static bool IsPushPop(MachineInstr &MI) {
+  const unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+   default:
+    return false;
+   case X86::PUSH64r:
+   case X86::POP64r:
+    return true;
+  }
+}
+
+static bool IsSandboxed(MachineInstr &MI);
+
+static bool IsStore(MachineInstr &MI) {
+  return MI.getDesc().mayStore();
+}
+
+static bool IsLoad(MachineInstr &MI) {
+  return MI.getDesc().mayLoad();
+}
+
+static bool IsFrameChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::EBP, NULL) ||
+         MI.modifiesRegister(X86::RBP, NULL);
+}
+
+static bool IsStackChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::ESP, NULL) ||
+         MI.modifiesRegister(X86::RSP, NULL);
+}
+
+
+static bool HasControlFlow(const MachineInstr &MI) {
+ return MI.getDesc().isBranch() ||
+        MI.getDesc().isCall() ||
+        MI.getDesc().isReturn() ||
+        MI.getDesc().isTerminator() ||
+        MI.getDesc().isBarrier();
+}
+
+static bool IsDirectBranch(const MachineInstr &MI) {
+  return  MI.getDesc().isBranch() &&
+         !MI.getDesc().isIndirectBranch();
+}
+
+static bool IsRegAbsolute(unsigned Reg) {
+  return (Reg == X86::RSP || Reg == X86::RBP ||
+          Reg == X86::R15 || Reg == X86::RIP);
+}
+
+static bool FindMemoryOperand(const MachineInstr &MI, unsigned* index) {
+  int NumFound = 0;
+  unsigned MemOp = 0;
+  for (unsigned i = 0; i < MI.getNumOperands(); ) {
+    if (isMem(&MI, i)) {
+      NumFound++;
+      MemOp = i;
+      i += X86::AddrNumOperands;
+    } else {
+      i++;
+    }
+  }
+
+  // Intrinsics and other functions can have mayLoad and mayStore to reflect
+  // the side effects of those functions.  This function is used to find
+  // explicit memory references in the instruction, of which there are none.
+  if (NumFound == 0)
+    return false;
+
+  if (NumFound > 1)
+    llvm_unreachable("Too many memory operands in instruction!");
+
+  *index = MemOp;
+  return true;
+}
+
+static unsigned PromoteRegTo64(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i64, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+static unsigned DemoteRegTo32(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i32, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+
+//
+// True if this MI restores RSP from RBP with a slight adjustment offset.
+//
+static bool MatchesSPAdj(const MachineInstr &MI) {
+  assert (MI.getOpcode() == X86::LEA64r && "Call to MatchesSPAdj w/ non LEA");
+  const MachineOperand &DestReg = MI.getOperand(0);
+  const MachineOperand &BaseReg = MI.getOperand(1);
+  const MachineOperand &Scale = MI.getOperand(2);
+  const MachineOperand &IndexReg = MI.getOperand(3);
+  const MachineOperand &Offset = MI.getOperand(4);
+  return (DestReg.isReg() && DestReg.getReg() == X86::RSP &&
+          BaseReg.isReg() && BaseReg.getReg() == X86::RBP &&
+          Scale.getImm() == 1 &&
+          IndexReg.isReg() && IndexReg.getReg() == 0 &&
+          Offset.isImm());
+}
+
+void
+X86NaClRewritePass::TraceLog(const char *func,
+                             const MachineBasicBlock &MBB,
+                             const MachineBasicBlock::iterator MBBI) const {
+  DEBUG(dbgs() << "@" << func << "(" << MBB.getName() << ", " << (*MBBI) << ")\n");
+}
+
+bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyStackSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsStackChange(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  if (MI.getDesc().isCall())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(DestReg == X86::ESP || DestReg == X86::RSP);
+
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::ADD64ri8 : NewOpc = X86::NACL_ASPi8; break;
+  case X86::ADD64ri32: NewOpc = X86::NACL_ASPi32; break;
+  case X86::SUB64ri8 : NewOpc = X86::NACL_SSPi8; break;
+  case X86::SUB64ri32: NewOpc = X86::NACL_SSPi32; break;
+  case X86::AND64ri32: NewOpc = X86::NACL_ANDSPi32; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addImm(MI.getOperand(2).getImm())
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Promote "MOV ESP, EBP" to a 64-bit move
+  if (Opc == X86::MOV32rr && MI.getOperand(1).getReg() == X86::EBP) {
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::MOV64rr));
+    Opc = X86::MOV64rr;
+  }
+
+  // "MOV RBP, RSP" is already safe
+  if (Opc == X86::MOV64rr && MI.getOperand(1).getReg() == X86::RBP) {
+    return true;
+  }
+
+  //  Promote 32-bit lea to 64-bit lea (does this ever happen?)
+  assert(Opc != X86::LEA32r && "Invalid opcode in 64-bit mode!");
+  if (Opc == X86::LEA64_32r) {
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned BaseReg = MI.getOperand(1).getReg();
+    unsigned Scale   = MI.getOperand(2).getImm();
+    unsigned IndexReg = MI.getOperand(3).getReg();
+    assert(DestReg == X86::ESP);
+    assert(Scale == 1);
+    assert(BaseReg == X86::EBP);
+    assert(IndexReg == 0);
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::LEA64r));
+    Opc = X86::LEA64r;
+  }
+
+  if (Opc == X86::LEA64r && MatchesSPAdj(MI)) {
+    const MachineOperand &Offset = MI.getOperand(4);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32))
+      .addImm(Offset.getImm())
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr))
+      .addReg(DemoteRegTo32(MI.getOperand(1).getReg()))
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rm) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPm))
+      .addOperand(MI.getOperand(1)) // Base
+      .addOperand(MI.getOperand(2)) // Scale
+      .addOperand(MI.getOperand(3)) // Index
+      .addOperand(MI.getOperand(4)) // Offset
+      .addOperand(MI.getOperand(5)) // Segment
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Stack SFI");
+}
+
+bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyFrameSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsFrameChange(MI))
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Handle moves to RBP
+  if (Opc == X86::MOV64rr) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+    unsigned SrcReg = MI.getOperand(1).getReg();
+
+    // MOV RBP, RSP is already safe
+    if (SrcReg == X86::RSP)
+      return false;
+
+    // Rewrite: mov %rbp, %rX
+    // To:      naclrestbp %eX, %r15
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr))
+      .addReg(DemoteRegTo32(SrcReg))
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Handle memory moves to RBP
+  if (Opc == X86::MOV64rm) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    // Rewrite: mov %rbp, (...)
+    // To:      naclrestbp (...), %r15
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addOperand(MI.getOperand(1))  // Base
+      .addOperand(MI.getOperand(2))  // Scale
+      .addOperand(MI.getOperand(3))  // Index
+      .addOperand(MI.getOperand(4))  // Offset
+      .addOperand(MI.getOperand(5))  // Segment
+      .addReg(X86::R15); // rZP
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Popping onto RBP
+  // Rewrite to:
+  //   naclrestbp (%rsp), %r15
+  //   naclasp $8, %r15
+  //
+  // TODO(pdox): Consider rewriting to this instead:
+  //   .bundle_lock
+  //   pop %rbp
+  //   mov %ebp,%ebp
+  //   add %r15, %rbp
+  //   .bundle_unlock
+  if (Opc == X86::POP64r) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addReg(X86::RSP)  // Base
+      .addImm(1)  // Scale
+      .addReg(0)  // Index
+      .addImm(0)  // Offset
+      .addReg(0)  // Segment
+      .addReg(X86::R15); // rZP
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8))
+      .addImm(8)
+      .addReg(X86::R15);
+
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Frame SFI");
+}
+
+bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyControlSFI", MBB, MBBI);
+  MachineInstr &MI = *MBBI;
+
+  if (!HasControlFlow(MI))
+    return false;
+
+  // Direct branches are OK
+  if (IsDirectBranch(MI))
+    return false;
+
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // Rewrite indirect jump/call instructions
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  // 32-bit
+  case X86::JMP32r               : NewOpc = X86::NACL_JMP32r; break;
+  case X86::TAILJMPr             : NewOpc = X86::NACL_JMP32r; break;
+  case X86::NACL_CG_CALL32r      : NewOpc = X86::NACL_CALL32r; break;
+  // 64-bit
+  case X86::NACL_CG_JMP64r       : NewOpc = X86::NACL_JMP64r; break;
+  case X86::NACL_CG_CALL64r      : NewOpc = X86::NACL_CALL64r; break;
+  case X86::NACL_CG_TAILJMPr64   : NewOpc = X86::NACL_JMP64r; break;
+  }
+  if (NewOpc) {
+    MachineInstrBuilder NewMI =
+     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+       .addOperand(MI.getOperand(0));
+    if (Is64Bit) {
+      NewMI.addReg(X86::R15);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // EH_RETURN has a single argment which is not actually used directly.
+  // The argument gives the location where to reposition the stack pointer
+  // before returning. EmitPrologue takes care of that repositioning.
+  // So EH_RETURN just ultimately emits a plain "ret"
+  if (Opc == X86::RET || Opc == X86::EH_RETURN || Opc == X86::EH_RETURN64) {
+    // To maintain compatibility with nacl-as, for now we don't emit naclret.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32));
+    if (Is64Bit) {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX);
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r))
+        .addReg(X86::ECX)
+        .addReg(X86::R15);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
+        .addReg(X86::ECX);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::RETI) {
+    // To maintain compatibility with nacl-as, for now we don't emit naclret.
+    // MI.setDesc(TII->get(X86::NACL_RETI32));
+    assert(!Is64Bit);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP)
+      .addReg(X86::ESP)
+      .addOperand(MI.getOperand(0));
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
+      .addReg(X86::ECX);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Rewrite trap
+  if (Opc == X86::TRAP) {
+    // To maintain compatibility with nacl-as, for now we don't emit nacltrap.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32));
+    BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi))
+      .addReg(Is64Bit ? X86::R15 : 0) // Base
+      .addImm(1) // Scale
+      .addReg(0) // Index
+      .addImm(0) // Offset
+      .addReg(0) // Segment
+      .addImm(0); // Value
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::NACL_LONGJ32 ||
+      Opc == X86::NACL_LONGJ64) {
+    // The expansions for these intrinsics already handle control SFI.
+    return false;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Control SFI");
+}
+
+//
+// Sandboxes loads and stores (64-bit only)
+//
+bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyMemorySFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsLoad(MI) && !IsStore(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  unsigned MemOp;
+  if (!FindMemoryOperand(MI, &MemOp))
+    return false;
+  assert(isMem(&MI, MemOp));
+  MachineOperand &BaseReg  = MI.getOperand(MemOp + 0);
+  MachineOperand &Scale = MI.getOperand(MemOp + 1);
+  MachineOperand &IndexReg  = MI.getOperand(MemOp + 2);
+  //MachineOperand &Disp = MI.getOperand(MemOp + 3);
+  MachineOperand &SegmentReg = MI.getOperand(MemOp + 4);
+
+  // Make sure the base and index are 64-bit registers.
+  IndexReg.setReg(PromoteRegTo64(IndexReg.getReg()));
+  BaseReg.setReg(PromoteRegTo64(BaseReg.getReg()));
+  assert(IndexReg.getSubReg() == 0);
+  assert(BaseReg.getSubReg() == 0);
+
+  bool AbsoluteBase = IsRegAbsolute(BaseReg.getReg());
+  bool AbsoluteIndex = IsRegAbsolute(IndexReg.getReg());
+  unsigned AddrReg = 0;
+
+  if (AbsoluteBase && AbsoluteIndex) {
+    llvm_unreachable("Unexpected absolute register pair");
+  } else if (AbsoluteBase) {
+    AddrReg = IndexReg.getReg();
+  } else if (AbsoluteIndex) {
+    assert(!BaseReg.getReg() && "Unexpected base register");
+    assert(Scale.getImm() == 1);
+    AddrReg = 0;
+  } else {
+    assert(!BaseReg.getReg() && "Unexpected relative register pair");
+    BaseReg.setReg(X86::R15);
+    AddrReg = IndexReg.getReg();
+  }
+
+  if (AddrReg) {
+    assert(!SegmentReg.getReg() && "Unexpected segment register");
+    SegmentReg.setReg(X86::PSEUDO_NACL_SEG);
+    return true;
+  }
+
+  return false;
+}
+
+bool X86NaClRewritePass::ApplyRewrites(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // These direct jumps need their opcode rewritten
+  // and variable operands removed.
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::NACL_CG_CALLpcrel32  : NewOpc = X86::NACL_CALL32d; break;
+  case X86::TAILJMPd             : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_TAILJMPd64   : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_CALL64pcrel32: NewOpc = X86::NACL_CALL64d; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addOperand(MI.getOperand(0));
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::NACL_CG_TLS_addr32) {
+    // Rewrite to nacltlsaddr32
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_TLS_addr32))
+      .addOperand(MI.getOperand(0))  // Base
+      .addOperand(MI.getOperand(1))  // Scale
+      .addOperand(MI.getOperand(2))  // Index
+      .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, X86II::MO_TLSGD)
+      .addOperand(MI.getOperand(4)); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // General Dynamic NaCl TLS model
+  // http://code.google.com/p/nativeclient/issues/detail?id=1685
+  if (Opc == X86::NACL_CG_GD_TLS_addr64) {
+
+    // Rewrite to:
+    //   leaq $sym@TLSGD(%rip), %rdi
+    //   naclcall __tls_get_addr@PLT
+    BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), X86::RDI)
+        .addReg(X86::RIP) // Base
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_CALL64d))
+        .addExternalSymbol("__tls_get_addr", X86II::MO_PLT);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Local Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_LE_TLS_addr64 ||
+      Opc == X86::NACL_CG_LE_TLS_addr32) {
+    unsigned CallOpc, LeaOpc, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   lea $sym@flag(,%reg), %reg
+    if (Opc == X86::NACL_CG_LE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      LeaOpc = X86::LEA64r;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      LeaOpc = X86::LEA32r;
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(LeaOpc), Reg)
+        .addReg(0) // Base
+        .addImm(1) // Scale
+        .addReg(Reg) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Initial Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_IE_TLS_addr64 ||
+      Opc == X86::NACL_CG_IE_TLS_addr32) {
+    unsigned CallOpc, AddOpc, Base, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   addq sym@flag(%base), %reg
+    if (Opc == X86::NACL_CG_IE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      AddOpc = X86::ADD64rm;
+      Base = X86::RIP;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      AddOpc = X86::ADD32rm;
+      Base = MI.getOperand(3).getTargetFlags() == X86II::MO_INDNTPOFF ?
+          0 : X86::EBX; // EBX for GOTNTPOFF.
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(AddOpc), Reg)
+        .addReg(Reg)
+        .addReg(Base)
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+// Rewrite the sequence generated to implement CopyToReg for EFLAGS, when
+// LLVM tries to keep EFLAGS live across a call to avoid emitting a CMP.
+// %r/m = <some flags-setting op>
+// pushf
+// pop %rY
+// <call>
+// push %rY
+// popf
+// <conditional branch>
+// becomes:
+// %r/m = <some flags-setting op>
+// %rY = %r/m
+// <call>
+// cmp %rY, 0
+// <conditional branch>
+// A proper fix would involve fixing X86TargetLowering::EmitTest to check
+// that a the path to the flags-setting op does not chain through a call
+// and avoid the optimization in that case
+// BUG: http://code.google.com/p/nativeclient/issues/detail?id=2711
+
+bool X86NaClRewritePass::RewritePushfPopf(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          MachineBasicBlock::iterator *Next) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+  bool Is64Bit = false;
+
+  switch(Opc) {
+    case X86::PUSHF64:
+      Is64Bit = true;
+      // fall through
+    case X86::PUSHF32: {
+      MachineBasicBlock::iterator Prev = MBBI;
+      --Prev;
+      assert((*Next)->getOpcode() == (Is64Bit ? X86::POP64r : X86::POP32r)
+             && "Unknown pushf sequence");
+      // Take the destination of the flags-setting op (Prev) and move it to
+      // the destination of the pop (Next)
+      int MovOpc;
+      if (Prev->memoperands_empty()) {
+        MovOpc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+        BuildMI(MBB, MBBI, DL, TII->get(MovOpc))
+            .addOperand((*Next)->getOperand(0))
+            .addOperand(Prev->getOperand(0));
+      } else {
+        MovOpc = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
+        // Memory operands are an operand tuple of
+        // [base,scale,index,disp,segment]
+        BuildMI(MBB, MBBI, DL, TII->get(MovOpc))
+            .addOperand((*Next)->getOperand(0))
+            .addOperand(Prev->getOperand(0))
+            .addOperand(Prev->getOperand(1))
+            .addOperand(Prev->getOperand(2))
+            .addOperand(Prev->getOperand(3))
+            .addOperand(Prev->getOperand(4))
+            .addMemOperand(*Prev->memoperands_begin());
+      }
+
+      MI.eraseFromParent();
+      // Just use Prev as a placeholder to delete the pop
+      Prev = *Next;
+      ++(*Next);
+      Prev->eraseFromParent();
+      return true;
+    }
+    case X86::POPF64:
+      Is64Bit = true;
+      // fall through
+    case X86::POPF32: {
+      int PushOpc;
+      int CmpOpc;
+      PushOpc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
+      CmpOpc = Is64Bit ? X86::CMP64ri32 : X86::CMP32ri;
+
+      MachineBasicBlock::iterator Prev = MBBI;
+      --Prev;
+      // Create a compare of the destination of the push (Prev) to 0
+      assert(Prev->getOpcode() == PushOpc && "Unknown popf sequence");
+      BuildMI(MBB, MBBI, DL, TII->get(CmpOpc))
+          .addReg(Prev->getOperand(0).getReg())
+          .addImm(0);
+      Prev->eraseFromParent();
+      MI.eraseFromParent();
+      return true;
+    }
+    default:
+      return false;
+  }
+}
+
+bool X86NaClRewritePass::AlignJumpTableTargets(MachineFunction &MF) {
+  bool Modified = true;
+
+  MF.setAlignment(5); // log2, 32 = 2^5
+
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI != NULL) {
+    const std::vector<MachineJumpTableEntry> &JT = JTI->getJumpTables();
+    for (unsigned i = 0; i < JT.size(); ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      for (unsigned j = 0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(5);
+        Modified |= true;
+      }
+    }
+  }
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  bool Modified = false;
+
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  Subtarget = &TM->getSubtarget<X86Subtarget>();
+  Is64Bit = Subtarget->is64Bit();
+
+  assert(Subtarget->isTargetNaCl() && "Unexpected target in NaClRewritePass!");
+
+  DEBUG(dbgs() << "*************** NaCl Rewrite Pass ***************\n");
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    Modified |= runOnMachineBasicBlock(*MFI);
+  }
+  Modified |= AlignJumpTableTargets(MF);
+  DEBUG(dbgs() << "*************** NaCl Rewrite DONE  ***************\n");
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  if (MBB.hasAddressTaken()) {
+    //FIXME: use a symbolic constant or get this value from some configuration
+    MBB.setAlignment(5);
+    Modified = true;
+  }
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI;
+       MBBI != MBB.end(); MBBI = NextMBBI) {
+    ++NextMBBI;
+    // When one of these methods makes a change,
+    // it returns true, skipping the others.
+    if (ApplyRewrites(MBB, MBBI) ||
+        RewritePushfPopf(MBB, MBBI, &NextMBBI) ||
+        (Is64Bit && ApplyStackSFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyMemorySFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyFrameSFI(MBB, MBBI)) ||
+        ApplyControlSFI(MBB, MBBI)) {
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+static bool IsSandboxed(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  // 32-bit
+  case X86::NACL_TRAP32:
+  case X86::NACL_RET32:
+  case X86::NACL_RETI32:
+  case X86::NACL_JMP32r:
+  case X86::NACL_CALL32d:
+  case X86::NACL_CALL32r:
+
+  // 64-bit
+  case X86::NACL_TRAP64:
+  case X86::NACL_RET64:
+  case X86::NACL_JMP64r:
+  case X86::NACL_CALL64r:
+  case X86::NACL_CALL64d:
+
+  case X86::NACL_ASPi8:
+  case X86::NACL_ASPi32:
+  case X86::NACL_SSPi8:
+  case X86::NACL_SSPi32:
+  case X86::NACL_SPADJi32:
+  case X86::NACL_RESTSPr:
+  case X86::NACL_RESTSPm:
+  case X86::NACL_RESTBPr:
+  case X86::NACL_RESTBPm:
+    return true;
+
+  case X86::MOV64rr:
+    // copy from safe regs
+    const MachineOperand &DestReg = MI.getOperand(0);
+    const MachineOperand &SrcReg = MI.getOperand(1);
+    return DestReg.getReg() == X86::RSP && SrcReg.getReg() == X86::RBP;
+  }
+  return false;
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  dbgs() << MI;
+  dbgs() << MI.getNumOperands() << " operands:" << "\n";
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+    const MachineOperand& op = MI.getOperand(i);
+    dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+  }
+  dbgs() << "\n";
+}
+
+/// createX86NaClRewritePassPass - returns an instance of the pass.
+namespace llvm {
+  FunctionPass* createX86NaClRewritePass() {
+    return new X86NaClRewritePass();
+  }
+}
diff --git a/lib/Wrap/LLVMBuild.txt b/lib/Wrap/LLVMBuild.txt
new file mode 100644
index 0000000000..8750711338
--- /dev/null
+++ b/lib/Wrap/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Wrap/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Wrap
+parent = Libraries
diff --git a/lib/Wrap/Makefile b/lib/Wrap/Makefile
new file mode 100644
index 0000000000..79aa2b3531
--- /dev/null
+++ b/lib/Wrap/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMWrap
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Wrap/bitcode_wrapperer.cpp b/lib/Wrap/bitcode_wrapperer.cpp
new file mode 100644
index 0000000000..eeb2825793
--- /dev/null
+++ b/lib/Wrap/bitcode_wrapperer.cpp
@@ -0,0 +1,355 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/bitcode_wrapperer.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+using std::vector;
+
+// The number of bytes in a 32 bit integer.
+static const uint32_t kWordSize = 4;
+
+// Number of LLVM-defined fixed fields in the header.
+static const uint32_t kLLVMFields = 4;
+
+// Total number of fixed fields in the header.
+static const uint32_t kFixedFields = 7;
+
+// The magic number that must exist for bitcode wrappers.
+static const uint32_t kWrapperMagicNumber = 0x0B17C0DE;
+
+// The version number associated with a wrapper file.
+// Note: llvm currently only allows the value 0. When this changes,
+// we should consider making this a command line option.
+static const uint32_t kLLVMVersionNumber = 0;
+
+// Fields defined by Android bitcode header.
+static const uint32_t kAndroidHeaderVersion = 0;
+static const uint32_t kAndroidTargetAPI = 0;
+static const uint32_t kAndroidDefaultCompilerVersion = 0;
+static const uint32_t kAndroidDefaultOptimizationLevel = 3;
+
+// PNaCl bitcode version number.
+static const uint32_t kPnaclBitcodeVersion = 0;
+
+// Max size for variable fields. Currently only used for writing them
+// out to files (the parsing works for arbitrary sizes).
+static const size_t kMaxVariableFieldSize = 256;
+
+BitcodeWrapperer::BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile)
+    : infile_(infile),
+      outfile_(outfile),
+      buffer_size_(0),
+      cursor_(0),
+      infile_at_eof_(false),
+      infile_bc_offset_(0),
+      wrapper_bc_offset_(0),
+      wrapper_bc_size_(0),
+      android_header_version_(kAndroidHeaderVersion),
+      android_target_api_(kAndroidTargetAPI),
+      pnacl_bc_version_(0),
+      error_(false) {
+  buffer_.resize(kBitcodeWrappererBufferSize);
+  if (IsInputBitcodeWrapper()) {
+    ParseWrapperHeader();
+  } else if (IsInputBitcodeFile()) {
+    wrapper_bc_offset_ = kWordSize * kFixedFields;
+    wrapper_bc_size_ = GetInFileSize();
+  } else {
+    fprintf(stderr, "Error: input file is not a bitcode file.\n");
+    error_ = true;
+  }
+}
+
+BitcodeWrapperer::~BitcodeWrapperer() {
+  for(size_t i = 0; i < variable_field_data_.size(); i++) {
+    delete [] variable_field_data_[i];
+  }
+}
+
+
+void BitcodeWrapperer::ClearBuffer() {
+  buffer_size_ = 0;
+  cursor_ = 0;
+  infile_at_eof_ = false;
+}
+
+bool BitcodeWrapperer::Seek(uint32_t pos) {
+  if (infile_ != NULL && infile_->Seek(pos)) {
+    ClearBuffer();
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::CanReadWord() {
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    return GetBufferUnreadBytes() >= kWordSize;
+  } else {
+    return true;
+  }
+}
+
+void BitcodeWrapperer::FillBuffer() {
+  if (cursor_ > 0) {
+    // Before filling, move any remaining bytes to the
+    // front of the buffer. This allows us to assume
+    // that after the call to FillBuffer, readable
+    // text is contiguous.
+    if (cursor_ < buffer_size_) {
+      size_t i = 0;
+      while (cursor_ < buffer_size_) {
+        buffer_[i++] = buffer_[cursor_++];
+      }
+      cursor_ = 0;
+      buffer_size_ = i;
+    }
+  } else {
+    // Assume the buffer contents have been used,
+    // and we want to completely refill it.
+    buffer_size_ = 0;
+  }
+
+  // If we don't have an input, we can't refill the buffer at all.
+  if (infile_ == NULL) {
+    return;
+  }
+
+  // Now fill in remaining space.
+  size_t needed = buffer_.size() - buffer_size_;
+
+  while (buffer_.size() > buffer_size_) {
+    int actually_read = infile_->Read(&buffer_[buffer_size_], needed);
+    if (infile_->AtEof()) {
+      infile_at_eof_ = true;
+    }
+    if (actually_read) {
+      buffer_size_ += actually_read;
+      needed -= actually_read;
+    } else if (infile_at_eof_) {
+      break;
+    }
+  }
+}
+
+bool BitcodeWrapperer::ReadWord(uint32_t& word) {
+  if (!CanReadWord()) return false;
+  word = (((uint32_t) BufferLookahead(0)) << 0)
+      | (((uint32_t) BufferLookahead(1)) << 8)
+      | (((uint32_t) BufferLookahead(2)) << 16)
+      | (((uint32_t) BufferLookahead(3)) << 24);
+  cursor_ += kWordSize;
+  return true;
+}
+
+bool BitcodeWrapperer::WriteWord(uint32_t value) {
+  uint8_t buffer[kWordSize];
+  buffer[3] = (value >> 24) & 0xFF;
+  buffer[2] = (value >> 16) & 0xFF;
+  buffer[1] = (value >> 8)  & 0xFF;
+  buffer[0] = (value >> 0)  & 0xFF;
+  return outfile_->Write(buffer, kWordSize);
+}
+
+bool BitcodeWrapperer::WriteVariableFields() {
+  // This buffer may have to be bigger if we start using the fields
+  // for larger things.
+  uint8_t buffer[kMaxVariableFieldSize];
+  for (vector<BCHeaderField>::iterator it = header_fields_.begin();
+       it != header_fields_.end(); ++it) {
+    if (!it->Write(buffer, kMaxVariableFieldSize) ||
+        !outfile_->Write(buffer, it->GetTotalSize())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::ParseWrapperHeader() {
+  // Make sure LLVM-defined fields have been parsed
+  if (!IsInputBitcodeWrapper()) return false;
+  // Check the android/pnacl fields
+  if (!ReadWord(android_header_version_) ||
+      !ReadWord(android_target_api_) || !ReadWord(pnacl_bc_version_)) {
+    fprintf(stderr, "Error: file not long enough to contain header\n");
+    return false;
+  }
+  if (pnacl_bc_version_ != kPnaclBitcodeVersion) {
+    fprintf(stderr, "Error: bad PNaCl Bitcode version\n");
+    return false;
+  }
+  int field_data_total = wrapper_bc_offset_ - kWordSize * kFixedFields;
+  if (field_data_total > 0) {
+    // Read in the variable fields. We need to allocate space for the data.
+    int field_data_read = 0;
+
+    while (field_data_read < field_data_total) {
+      FillBuffer();
+      size_t buffer_needed = BCHeaderField::GetDataSizeFromSerialized(
+          &buffer_[cursor_]);
+      if (buffer_needed > buffer_.size()) {
+        buffer_.resize(buffer_needed +
+                       sizeof(BCHeaderField::FixedSubfield) * 2);
+        FillBuffer();
+      }
+      variable_field_data_.push_back(new uint8_t[buffer_needed]);
+
+      BCHeaderField field(BCHeaderField::kInvalid, 0,
+                          variable_field_data_.back());
+      field.Read(&buffer_[cursor_], buffer_size_);
+      header_fields_.push_back(field);
+      size_t field_size = field.GetTotalSize();
+      cursor_ += field_size;
+      field_data_read += field_size;
+      if (field_data_read > field_data_total) {
+        // We read too much data, the header is corrupted
+        fprintf(stderr, "Error: raw bitcode offset inconsistent with "
+                "variable field data\n");
+        return false;
+      }
+    }
+    Seek(0);
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeWrapper() {
+  ResetCursor();
+  // First make sure that there are enough words (LLVM header)
+  // to peek at.
+  if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) return false;
+  }
+
+  // Now make sure the magic number is right.
+  uint32_t first_word;
+  if ((!ReadWord(first_word)) ||
+      (kWrapperMagicNumber != first_word)) return false;
+
+  // Make sure the version is right.
+  uint32_t second_word;
+  if ((!ReadWord(second_word)) ||
+      (kLLVMVersionNumber != second_word)) return false;
+
+  // Make sure that the offset and size (for llvm) is defined.
+  uint32_t bc_offset;
+  uint32_t bc_size;
+  if (ReadWord(bc_offset) &&
+      ReadWord(bc_size)) {
+    // Before returning, save the extracted values.
+    wrapper_bc_offset_ = bc_offset;
+    infile_bc_offset_ = bc_offset;
+    wrapper_bc_size_ = bc_size;
+    return true;
+  }
+  // If reached, unable to read wrapped header.
+  return false;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeFile() {
+  ResetCursor();
+  // First make sure that there are four bytes to peek at.
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kWordSize) return false;
+  }
+  // If reached, Check if first 4 bytes match bitcode
+  // file magic number.
+  return (BufferLookahead(0) == 'B') &&
+      (BufferLookahead(1) == 'C') &&
+      (BufferLookahead(2) == 0xc0) &&
+      (BufferLookahead(3) == 0xde);
+}
+
+bool BitcodeWrapperer::BufferCopyInToOut(uint32_t size) {
+  while (size > 0) {
+    // Be sure buffer is non-empty before writing.
+    if (0 == buffer_size_) {
+      FillBuffer();
+      if (0 == buffer_size_) {
+        return false;
+      }
+    }
+    // copy the buffer to the output file.
+    size_t block = (buffer_size_ < size) ? buffer_size_ : size;
+    if (!outfile_->Write(&buffer_[cursor_], block)) return false;
+    size -= block;
+    buffer_size_ = 0;
+  }
+  // Be sure that there isn't more bytes on the input stream.
+  FillBuffer();
+  return buffer_size_ == 0;
+}
+
+void BitcodeWrapperer::AddHeaderField(BCHeaderField* field) {
+  vector<BCHeaderField>::iterator it = header_fields_.begin();
+  for (; it != header_fields_.end(); ++it) {
+    // If this field is the same as an existing one, overwrite it.
+    if (it->getID() == field->getID()) {
+      wrapper_bc_offset_ += (field->GetTotalSize() - it->GetTotalSize());
+      *it = *field;
+      break;
+    }
+  }
+  if (it == header_fields_.end()) { // there was no match, add a new field
+    header_fields_.push_back(*field);
+    wrapper_bc_offset_ += field->GetTotalSize();
+  }
+}
+
+bool BitcodeWrapperer::WriteBitcodeWrapperHeader() {
+  return
+      // Note: This writes out the 4 word header required by llvm wrapped
+      // bitcode.
+      WriteWord(kWrapperMagicNumber) &&
+      WriteWord(kLLVMVersionNumber) &&
+      WriteWord(wrapper_bc_offset_) &&
+      WriteWord(wrapper_bc_size_) &&
+      // 2 fixed fields defined by Android
+      WriteWord(android_header_version_) &&
+      WriteWord(android_target_api_) &&
+      // PNaClBitcode version
+      WriteWord(kPnaclBitcodeVersion) &&
+      // Common variable-length fields
+      WriteVariableFields();
+}
+
+void BitcodeWrapperer::PrintWrapperHeader() {
+  if (error_) {
+    fprintf(stderr, "Error condition exists: the following"
+            "data may not be reliable\n");
+  }
+  fprintf(stderr, "Wrapper magic:\t\t%x\n", kWrapperMagicNumber);
+  fprintf(stderr, "LLVM Bitcode version:\t%d\n", kLLVMVersionNumber);
+  fprintf(stderr, "Raw bitcode offset:\t%d\n", wrapper_bc_offset_);
+  fprintf(stderr, "Raw bitcode size:\t%d\n", wrapper_bc_size_);
+  fprintf(stderr, "Android header version:\t%d\n", android_header_version_);
+  fprintf(stderr, "Android target API:\t%d\n", android_target_api_);
+  fprintf(stderr, "PNaCl bitcode version:\t%d\n", kPnaclBitcodeVersion);
+  for (size_t i = 0; i < header_fields_.size(); i++) header_fields_[i].Print();
+}
+
+bool BitcodeWrapperer::GenerateWrappedBitcodeFile() {
+  if (!error_ &&
+      WriteBitcodeWrapperHeader() &&
+      Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_)) {
+    off_t dangling = wrapper_bc_size_ & 3;
+    if (dangling) {
+      return outfile_->Write((const uint8_t*) "\0\0\0\0", 4 - dangling);
+    }
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::GenerateRawBitcodeFile() {
+  return !error_ && Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_);
+}
diff --git a/lib/Wrap/file_wrapper_input.cpp b/lib/Wrap/file_wrapper_input.cpp
new file mode 100644
index 0000000000..fc592e0246
--- /dev/null
+++ b/lib/Wrap/file_wrapper_input.cpp
@@ -0,0 +1,53 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include "llvm/Wrap/file_wrapper_input.h"
+
+FileWrapperInput::FileWrapperInput(const std::string& name) :
+    _name(name), _at_eof(false), _size_found(false), _size(0) {
+  _file = fopen(name.c_str(), "rb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperInput::~FileWrapperInput() {
+  fclose(_file);
+}
+
+size_t FileWrapperInput::Read(uint8_t* buffer, size_t wanted) {
+  size_t found = fread((char*) buffer, 1, wanted, _file);
+  if (feof(_file) || ferror(_file)) {
+    _at_eof = true;
+  }
+  return found;
+}
+
+bool FileWrapperInput::AtEof() {
+  return _at_eof;
+}
+
+off_t FileWrapperInput::Size() {
+  if (_size_found) return _size;
+  struct stat st;
+  if (0 == stat(_name.c_str(), &st)) {
+    _size_found = true;
+    _size = st.st_size;
+    return _size;
+  } else {
+    fprintf(stderr, "Unable to compute file size: %s\n", _name.c_str());
+    exit(1);
+  }
+  // NOT REACHABLE.
+  return 0;
+}
+
+bool FileWrapperInput::Seek(uint32_t pos) {
+  return 0 == fseek(_file, (long) pos, SEEK_SET);
+}
diff --git a/lib/Wrap/file_wrapper_output.cpp b/lib/Wrap/file_wrapper_output.cpp
new file mode 100644
index 0000000000..f9f126868d
--- /dev/null
+++ b/lib/Wrap/file_wrapper_output.cpp
@@ -0,0 +1,37 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/file_wrapper_output.h"
+#include <stdlib.h>
+
+
+FileWrapperOutput::FileWrapperOutput(const std::string& name)
+    : _name(name) {
+  _file = fopen(name.c_str(), "wb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperOutput::~FileWrapperOutput() {
+  fclose(_file);
+}
+
+bool FileWrapperOutput::Write(uint8_t byte) {
+  return EOF != fputc(byte, _file);
+}
+
+bool FileWrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  if (!buffer) {
+    return false;
+  }
+
+  if (buffer_size > 0) {
+    return buffer_size == fwrite(buffer, 1, buffer_size, _file);
+  } else {
+    return true;
+  }
+}
diff --git a/lib/Wrap/wrapper_output.cpp b/lib/Wrap/wrapper_output.cpp
new file mode 100644
index 0000000000..493f29efa8
--- /dev/null
+++ b/lib/Wrap/wrapper_output.cpp
@@ -0,0 +1,9 @@
+#include "llvm/Wrap/wrapper_output.h"
+
+bool WrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  // Default implementation that uses the byte write routine.
+  for (size_t i = 0; i < buffer_size; ++i) {
+    if (!Write(buffer[i])) return false;
+  }
+  return true;
+}