Diff from hg rev 0b098ca44de7

author: Derek Schuff <dschuff@chromium.org> 2012-07-03 15:48:25 -0700
committer: Derek Schuff <dschuff@chromium.org> 2012-07-03 15:48:25 -0700
commit: 4f429c8b4e06d750b5464b6eafdd102af5196bdd (patch)
tree: 22a752c4654e3ab9e94c09739f7fb8f9e705433d
parent: e91f926f3b76774aa7ed4c327fbde6a39e42c87f (diff)
48 files changed, 8376 insertions, 0 deletions
diff --git a/include/llvm/ExecutionEngine/NaClJITMemoryManager.h b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h
new file mode 100644
index 0000000000..dcd06627df
--- /dev/null
+++ b/include/llvm/ExecutionEngine/NaClJITMemoryManager.h
@@ -0,0 +1,237 @@
+//=-- NaClJITMemoryManager.h - Interface JIT uses to Allocate Mem -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H
+#define LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class Function;
+class GlobalValue;
+
+struct SimpleSlab {
+  uint8_t *address;
+  size_t size;
+  uint8_t *next_free;
+};
+
+struct FreeListNode {
+  uint8_t *address;
+  uintptr_t size;
+  FreeListNode *Prev;
+  FreeListNode *Next;
+  FreeListNode *RemoveFromFreeList() {
+    assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+    Next->Prev = Prev;
+    return Prev->Next = Next;
+  }
+  void AddToFreeList(FreeListNode *FreeList) {
+     Next = FreeList;
+     Prev = FreeList->Prev;
+     Prev->Next = this;
+     Next->Prev = this;
+  }
+};
+
+class NaClJITMemoryManager : public JITMemoryManager {
+  // NaCl disallows writing into any code region, and disallows executing any
+  // data region. Thus we can never get any RWX memory and the the strategy
+  // used by the other allocators of colocation of allocation metadata
+  // with the allocated code won't work.
+  // Currently with NaCl we have one single pool of usable space between the
+  // text and rodata segments, defined by the linker
+  // so to support stub allocation in the middle of a function, we allocate
+  // them in slabs interspersed with the functions.
+
+  static const size_t kStubSlabSize = 16 * 1024;
+  static const size_t kDataSlabSize = 16 * 1024;
+  static const size_t kCodeSlabSize = 64 * 1024;
+
+  typedef DenseMap<uint8_t *, size_t> AllocationTable;
+
+  uint8_t *AllocatableRegionStart;
+  uint8_t *AllocatableRegionLimit;
+  uint8_t *NextCode;
+  SimpleSlab CurrentStubSlab;
+
+  // Allocation metadata must be kept separate from code, so the free list is
+  // allocated with new rather than being a header in the code blocks
+  FreeListNode *CodeFreeListHead;
+  FreeListNode *CurrentCodeBlock;
+  // Mapping from pointer to allocated function, to size of allocation
+  AllocationTable AllocatedFunctions;
+
+  // Since Exception tables are allocated like functions (i.e. we don't know
+  // ahead of time how large they are) we use the same allocation method for
+  // simplicity even though it's not strictly necessary to separate the
+  // allocation metadata from the allocated data.
+  FreeListNode *DataFreeListHead;
+  FreeListNode *CurrentDataBlock;
+  AllocationTable AllocatedTables;
+  BumpPtrAllocator DataAllocator;
+
+  uint8_t *GOTBase;     // Target Specific reserved memory
+
+  FreeListNode *allocateCodeSlab(size_t MinSize);
+  FreeListNode *allocateDataSlab(size_t MinSize);
+  SimpleSlab allocateStubSlab(size_t MinSize);
+
+  // Functions for allocations using one of the free lists
+  void InitFreeList(FreeListNode **Head);
+  void DestroyFreeList(FreeListNode *Head);
+  FreeListNode *FreeListAllocate(uintptr_t &ActualSize, FreeListNode *Head,
+      FreeListNode * (NaClJITMemoryManager::*allocate)(size_t));
+  void FreeListFinishAllocation(FreeListNode *Block, FreeListNode *Head,
+      uint8_t *AllocationStart, uint8_t *AllocationEnd, AllocationTable &table);
+  void FreeListDeallocate(FreeListNode *Head, AllocationTable &Table,
+                          void *Body);
+ public:
+  // TODO(dschuff): how to find the real value? is it a flag?
+  static const int kBundleSize = 32;
+  static const intptr_t kJumpMask = -32;
+  NaClJITMemoryManager();
+  virtual ~NaClJITMemoryManager();
+  static inline bool classof(const JITMemoryManager*) { return true; }
+
+  /// setMemoryWritable - No-op on NaCl - code is never writable
+  virtual void setMemoryWritable() {}
+
+  /// setMemoryExecutable - No-op on NaCl - data is never executable
+  virtual void setMemoryExecutable() {}
+
+  /// setPoisonMemory - No-op on NaCl - nothing unvalidated is ever executable
+  virtual void setPoisonMemory(bool poison) {}
+
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function. As such it is only useful for resolving library
+  /// symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  virtual void *getPointerToNamedFunction(const std::string &Name,
+                                          bool AbortOnFailure = true) ;
+
+  //===--------------------------------------------------------------------===//
+  // Global Offset Table Management
+  //===--------------------------------------------------------------------===//
+
+  /// AllocateGOT - If the current table requires a Global Offset Table, this
+  /// method is invoked to allocate it.  This method is required to set HasGOT
+  /// to true.
+  virtual void AllocateGOT();
+
+  /// getGOTBase - If this is managing a Global Offset Table, this method should
+  /// return a pointer to its base.
+  virtual uint8_t *getGOTBase() const {
+    return GOTBase;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Main Allocation Functions
+  //===--------------------------------------------------------------------===//
+
+  /// startFunctionBody - When we start JITing a function, the JIT calls this
+  /// method to allocate a block of free RWX memory, which returns a pointer to
+  /// it.  If the JIT wants to request a block of memory of at least a certain
+  /// size, it passes that value as ActualSize, and this method returns a block
+  /// with at least that much space.  If the JIT doesn't know ahead of time how
+  /// much space it will need to emit the function, it passes 0 for the
+  /// ActualSize.  In either case, this method is required to pass back the size
+  /// of the allocated block through ActualSize.  The JIT will be careful to
+  /// not write more than the returned ActualSize bytes of memory.
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize);
+
+  /// allocateStub - This method is called by the JIT to allocate space for a
+  /// function stub (used to handle limited branch displacements) while it is
+  /// JIT compiling a function.  For example, if foo calls bar, and if bar
+  /// either needs to be lazily compiled or is a native function that exists too
+  /// far away from the call site to work, this method will be used to make a
+  /// thunk for it.  The stub should be "close" to the current function body,
+  /// but should not be included in the 'actualsize' returned by
+  /// startFunctionBody.
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment);
+
+  /// endFunctionBody - This method is called when the JIT is done codegen'ing
+  /// the specified function.  At this point we know the size of the JIT
+  /// compiled function.  This passes in FunctionStart (which was returned by
+  /// the startFunctionBody method) and FunctionEnd which is a pointer to the
+  /// actual end of the function.  This method should mark the space allocated
+  /// and remember where it is in case the client wants to deallocate it.
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd);
+
+  /// allocateCodeSection - Allocate a memory block of (at least) the given
+  /// size suitable for executable code. The SectionID is a unique identifier
+  /// assigned by the JIT and passed through to the memory manager for
+  /// the instance class to use if it needs to communicate to the JIT about
+  /// a given section after the fact.
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID);
+
+  /// allocateDataSection - Allocate a memory block of (at least) the given
+  /// size suitable for data. The SectionID is a unique identifier
+  /// assigned by the JIT and passed through to the memory manager for
+  /// the instance class to use if it needs to communicate to the JIT about
+  /// a given section after the fact.
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID);
+
+  /// allocateSpace - Allocate a memory block of the given size.  This method
+  /// cannot be called between calls to startFunctionBody and endFunctionBody.
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment);
+
+  /// allocateGlobal - Allocate memory for a global.
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+  /// deallocateFunctionBody - Free the specified function body.  The argument
+  /// must be the return value from a call to startFunctionBody() that hasn't
+  /// been deallocated yet.  This is never called when the JIT is currently
+  /// emitting a function.
+  virtual void deallocateFunctionBody(void *Body);
+
+  /// startExceptionTable - When we finished JITing the function, if exception
+  /// handling is set, we emit the exception table.
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize);
+
+  /// endExceptionTable - This method is called when the JIT is done emitting
+  /// the exception table.
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister);
+
+  /// deallocateExceptionTable - Free the specified exception table's memory.
+  /// The argument must be the return value from a call to startExceptionTable()
+  /// that hasn't been deallocated yet.  This is never called when the JIT is
+  /// currently emitting an exception table.
+  virtual void deallocateExceptionTable(void *ET);
+
+  virtual size_t GetDefaultCodeSlabSize() {
+    return kCodeSlabSize;
+  }
+  virtual size_t GetDefaultDataSlabSize() {
+    return kDataSlabSize;
+  }
+  virtual size_t GetDefaultStubSlabSize() {
+    return kStubSlabSize;
+  }
+
+};
+
+}
+
+#endif  // LLVM_EXECUTION_ENGINE_NACL_JIT_MEMMANAGER_H
diff --git a/include/llvm/Support/support_macros.h b/include/llvm/Support/support_macros.h
new file mode 100644
index 0000000000..83d62c722c
--- /dev/null
+++ b/include/llvm/Support/support_macros.h
@@ -0,0 +1,25 @@
+// Define support macros for defining classes, etc.
+
+#ifndef LLVM_SUPPORT_SUPPORT_MACROS_H__
+#define LLVM_SUPPORT_SUPPORT_MACROS_H__
+
+// Define macro, to use within a class declaration,  to disallow constructor
+// copy. Defines copy constructor declaration under the assumption that it
+// is never defined.
+#define DISALLOW_CLASS_COPY(class_name) \
+  class_name(class_name& arg)  // Do not implement
+
+// Define macro, to use within a class declaration,  to disallow assignment.
+// Defines assignment operation declaration under the assumption that it
+// is never defined.
+#define DISALLOW_CLASS_ASSIGN(class_name) \
+  void operator=(class_name& arg)  // Do not implement
+
+// Define macro to add copy and assignment declarations to a class file,
+// for which no bodies will be defined, effectively disallowing these from
+// being defined in the class.
+#define DISALLOW_CLASS_COPY_AND_ASSIGN(class_name) \
+  DISALLOW_CLASS_COPY(class_name); \
+  DISALLOW_CLASS_ASSIGN(class_name)
+
+#endif  // LLVM_SUPPORT_SUPPORT_MACROS_H__
diff --git a/include/llvm/Wrap/BCHeaderField.h b/include/llvm/Wrap/BCHeaderField.h
new file mode 100644
index 0000000000..40a3714c9f
--- /dev/null
+++ b/include/llvm/Wrap/BCHeaderField.h
@@ -0,0 +1,106 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#ifndef LLVM_WRAP_BCHEADERFIELD_H
+#define LLVM_WRAP_BCHEADERFIELD_H
+#include <limits>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known Tag IDs.
+// Contains a pointer to the data but does not own the data, so it can be
+// copied with the trivial copy constructor/assignment operator.
+
+// The serialized format has 2 fixed subfields (ID and length) and the
+// variable-length data subfield
+class BCHeaderField {
+ public:
+  typedef enum {
+    kInvalid = 0,
+    kBitcodeHash = 1,
+    kAndroidCompilerVersion = 0x4001,
+    kAndroidOptimizationLevel = 0x4002
+  } Tag;
+  typedef uint16_t FixedSubfield;
+
+  BCHeaderField(Tag ID, size_t len, uint8_t* data) :
+      ID_(ID), len_(len), data_(data) {}
+  size_t GetTotalSize() {
+    // Round up to 4 byte alignment
+    return (kTagLenSize + len_ + 3) & ~3;
+  }
+
+  bool Write(uint8_t* buf, size_t buf_len) {
+    size_t fields_len = kTagLenSize + len_;
+    size_t pad_len = (4 - (fields_len & 3)) & 3;
+    // Ensure buffer is large enough and that length can be represented
+    // in 16 bits
+    if (buf_len < fields_len + pad_len ||
+        len_ > std::numeric_limits<FixedSubfield>::max()) return false;
+
+    WriteFixedSubfield(static_cast<FixedSubfield>(ID_), buf);
+    WriteFixedSubfield(static_cast<FixedSubfield>(len_),
+                       buf + sizeof(FixedSubfield));
+    memcpy(buf + kTagLenSize, data_, len_);
+    // Pad out to 4 byte alignment
+    if (pad_len) {
+      memset(buf + fields_len, 0, pad_len);
+    }
+    return true;
+  }
+
+  bool Read(const uint8_t* buf, size_t buf_len) {
+    if (buf_len < kTagLenSize) return false;
+    FixedSubfield field;
+    ReadFixedSubfield(&field, buf);
+    ID_ = static_cast<Tag>(field);
+    ReadFixedSubfield(&field, buf + sizeof(FixedSubfield));
+    len_ = static_cast<size_t>(field);
+    if (buf_len < kTagLenSize + len_) return false;
+    memcpy(data_, buf + kTagLenSize, len_);
+    return true;
+  }
+
+  void Print() {
+    fprintf(stderr, "Field ID: %d, data length %d, total length %d\n",
+            ID_, static_cast<int>(len_), static_cast<int>(GetTotalSize()));
+    fprintf(stderr, "Data: ");
+    for (size_t i = 0; i < len_; i++) fprintf(stderr, "%02x", data_[i]);
+    fprintf(stderr, "\n");
+  }
+
+  // Get the data size from a serialized field to allow allocation
+  static size_t GetDataSizeFromSerialized(const uint8_t* buf) {
+    FixedSubfield len;
+    ReadFixedSubfield(&len, buf + sizeof(FixedSubfield));
+    return len;
+  }
+
+  Tag getID() const {
+    return ID_;
+  }
+
+  size_t getLen() const {
+    return len_;
+  }
+
+ private:
+ // Combined size of the fixed subfields
+ const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+  static void WriteFixedSubfield(FixedSubfield value, uint8_t* buf) {
+    buf[0] = value & 0xFF;
+    buf[1] = (value >> 8) & 0xFF;
+  }
+  static void ReadFixedSubfield(FixedSubfield* value, const uint8_t* buf) {
+    *value = buf[0] | buf[1] << 8;
+  }
+  Tag ID_;
+  size_t len_;
+  uint8_t *data_;
+};
+
+#endif
diff --git a/include/llvm/Wrap/bitcode_wrapperer.h b/include/llvm/Wrap/bitcode_wrapperer.h
new file mode 100644
index 0000000000..89f2a4cbcc
--- /dev/null
+++ b/include/llvm/Wrap/bitcode_wrapperer.h
@@ -0,0 +1,192 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Define utility class to wrap/unwrap bitcode files. Does wrapping/unwrapping
+// in such a way that the wrappered bitcode file is still a bitcode file.
+
+#ifndef LLVM_WRAP_BITCODE_WRAPPERER_H__
+#define LLVM_WRAP_BITCODE_WRAPPERER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <vector>
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/BCHeaderField.h"
+#include "llvm/Wrap/wrapper_input.h"
+#include "llvm/Wrap/wrapper_output.h"
+
+// The bitcode wrapper header is the following 7 fixed 4-byte fields:
+//      1) 0B17C0DE - The magic number expected by llvm for wrapped bitcodes
+//      2) Version # 0 - The current version of wrapped bitcode files
+//      3) (raw) bitcode offset
+//      4) (raw) bitcode size
+//      5) Android header version
+//      6) Android target API
+//      7) PNaCl Bitcode version
+//      plus 0 or more variable-length fields (consisting of ID, length, data)
+
+// Initial buffer size. It is expanded if needed to hold large variable-size
+// fields.
+static const size_t kBitcodeWrappererBufferSize = 1024;
+
+// Support class for outputting a wrapped bitcode file from a raw bitcode
+// file (and optionally additional header fields), or for outputting a raw
+// bitcode file from a wrapped one.
+class BitcodeWrapperer {
+ public:
+  // Create a bitcode wrapperer using the following
+  // input and output files.
+  BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile);
+
+  // Returns true if the input file begins with a bitcode
+  // wrapper magic number. As a side effect, _wrapper_ fields are set.
+  bool IsInputBitcodeWrapper();
+
+  // Returns true if the input file begins with a bitcode
+  // file magic number.
+  bool IsInputBitcodeFile();
+
+  // Add a variable-length field to the header. The caller is responsible
+  // for freeing the data pointed to by the BCHeaderField.
+  void AddHeaderField(BCHeaderField* field);
+
+  // Generate a wrapped bitcode file from the input bitcode file
+  // and the current header data. Return true on success.
+  bool GenerateWrappedBitcodeFile();
+
+  // Unwrap the wrapped bitcode file, to the corresponding
+  // outfile. Return true on success.
+  bool GenerateRawBitcodeFile();
+
+  // Print current wrapper header fields to stderr for debugging.
+  void PrintWrapperHeader();
+
+  ~BitcodeWrapperer();
+
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(BitcodeWrapperer);
+
+  // Refills the buffer with more bytes. Does this in a way
+  // such that it is maximally filled.
+  void FillBuffer();
+
+  // Returns the number of bytes in infile.
+  off_t GetInFileSize() {
+    if (infile_ != NULL) {
+      return infile_->Size();
+    } else {
+      return 0;
+    }
+  }
+
+  // Returns the offset of bitcode (i.e. the size of the wrapper header)
+  // if the output file were to be written now.
+  size_t BitcodeOffset();
+
+  // Returns true if we can read a word. If necessary, fills the buffer
+  // with enough characters so that there are at least a 32-bit value
+  // in the buffer. Returns false if there isn't a 32-bit value
+  // to read from the input file.
+  bool CanReadWord();
+
+  // Read a (32-bit) word from the input. Return true
+  // if able to read the word.
+  bool ReadWord(uint32_t& word);
+
+  // Write a (32-bit) word to the output. Return true if successful
+  bool WriteWord(uint32_t word);
+
+  // Write all variable-sized header fields to the output. Return true
+  // if successful.
+  bool WriteVariableFields();
+
+  // Parse the bitcode wrapper header in the infile, if any. Return true
+  // if successful.
+  bool ParseWrapperHeader();
+
+  // Returns the i-th character in front of the cursor in the buffer.
+  uint8_t BufferLookahead(int i) { return buffer_[cursor_ + i]; }
+
+  // Returns how many unread bytes are in the buffer.
+  size_t GetBufferUnreadBytes() { return buffer_size_ - cursor_; }
+
+
+  // Backs up the read cursor to the beginning of the input buffer.
+  void ResetCursor() {
+    cursor_ = 0;
+  }
+
+  // Generates the header sequence for the wrapped bitcode being
+  // generated.
+  bool WriteBitcodeWrapperHeader();
+
+  // Copies size bytes of infile to outfile, using the buffer.
+  bool BufferCopyInToOut(uint32_t size);
+
+  // Discards the old infile and replaces it with the given file.
+  void ReplaceInFile(WrapperInput* new_infile);
+
+  // Discards the old outfile and replaces it with the given file.
+  void ReplaceOutFile(WrapperOutput* new_outfile);
+
+  // Moves to the given position in the input file. Returns false
+  // if unsuccessful.
+  bool Seek(uint32_t pos);
+
+  // Clear the buffer of all contents.
+  void ClearBuffer();
+
+  // The input file being processed. Can be either
+  // a bitcode file, a wrappered bitcode file, or a secondary
+  // file to be wrapped.
+  WrapperInput* infile_;
+
+  // The output file being generated. Can be either
+  // a bitcode file, a wrappered bitcode file, or a secondary
+  // unwrapped file.
+  WrapperOutput* outfile_;
+
+  // A buffer of bytes read from the input file.
+  std::vector<uint8_t> buffer_;
+
+  // The number of bytes that were read from the input file
+  // into the buffer.
+  size_t buffer_size_;
+
+  // The index to the current read point within the buffer.
+  size_t cursor_;
+
+  // True when eof of input is reached.
+  bool infile_at_eof_;
+
+  // The 32-bit value defining the offset of the raw bitcode in the input file.
+  uint32_t infile_bc_offset_;
+
+  // The 32-bit value defining the generated offset of the wrapped bitcode.
+  // This value changes as new fields are added with AddHeaderField
+  uint32_t wrapper_bc_offset_;
+
+  // The 32-bit value defining the size of the raw wrapped bitcode.
+  uint32_t wrapper_bc_size_;
+
+  // Android header version and target API
+  uint32_t android_header_version_;
+  uint32_t android_target_api_;
+
+  // PNaCl bitcode version
+  uint32_t pnacl_bc_version_;
+
+  // Vector of variable header fields
+  std::vector<BCHeaderField> header_fields_;
+  // If any bufferdata from header fields is owned, it is stored here and
+  // freed on destruction.
+  std::vector<uint8_t*> variable_field_data_;
+
+  // True if there was an error condition (e.g. the file is not bitcode)
+  bool error_;
+};
+
+#endif  // LLVM_WRAP_BITCODE_WRAPPERER_H__
diff --git a/include/llvm/Wrap/file_wrapper_input.h b/include/llvm/Wrap/file_wrapper_input.h
new file mode 100644
index 0000000000..9f3de004c4
--- /dev/null
+++ b/include/llvm/Wrap/file_wrapper_input.h
@@ -0,0 +1,48 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines utility allowing files for bitcode input wrapping.
+
+#ifndef FILE_WRAPPER_INPUT_H__
+#define FILE_WRAPPER_INPUT_H__
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/wrapper_input.h"
+
+#include <stdio.h>
+#include <string>
+
+// Define a class to wrap named files.
+class FileWrapperInput : public WrapperInput {
+ public:
+  FileWrapperInput(const std::string& name);
+  ~FileWrapperInput();
+  // Tries to read the requested number of bytes into the buffer. Returns the
+  // actual number of bytes read.
+  virtual size_t Read(uint8_t* buffer, size_t wanted);
+  // Returns true if at end of file. Note: May return false
+  // until Read is called, and returns 0.
+  virtual bool AtEof();
+  // Returns the size of the file (in bytes).
+  virtual off_t Size();
+  // Moves to the given offset within the file. Returns
+  // false if unable to move to that position.
+  virtual bool Seek(uint32_t pos);
+ private:
+  // The name of the file.
+  std::string _name;
+  // True once eof has been encountered.
+  bool _at_eof;
+  // True if size has been computed.
+  bool _size_found;
+  // The size of the file.
+  off_t _size;
+  // The corresponding (opened) file.
+  FILE* _file;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperInput);
+};
+
+#endif // FILE_WRAPPER_INPUT_H__
diff --git a/include/llvm/Wrap/file_wrapper_output.h b/include/llvm/Wrap/file_wrapper_output.h
new file mode 100644
index 0000000000..714bd36a75
--- /dev/null
+++ b/include/llvm/Wrap/file_wrapper_output.h
@@ -0,0 +1,34 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines utility allowing files for bitcode output wrapping.
+
+#ifndef FILE_WRAPPER_OUTPUT_H__
+#define FILE_WRAPPER_OUTPUT_H__
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/wrapper_output.h"
+#include <stdio.h>
+#include <string>
+
+// Define a class to wrap named files. */
+class FileWrapperOutput : public WrapperOutput {
+ public:
+  FileWrapperOutput(const std::string& name);
+  ~FileWrapperOutput();
+  // Writes a single byte, returning false if unable to write.
+  virtual bool Write(uint8_t byte);
+  // Writes the specified number of bytes in the buffer to
+  // output. Returns false if unable to write.
+  virtual bool Write(const uint8_t* buffer, size_t buffer_size);
+ private:
+  // The name of the file
+  std::string _name;
+  // The corresponding (opened) file.
+  FILE* _file;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperOutput);
+};
+#endif  // FILE_WRAPPER_OUTPUT_H__
diff --git a/include/llvm/Wrap/wrapper_input.h b/include/llvm/Wrap/wrapper_input.h
new file mode 100644
index 0000000000..cde918083a
--- /dev/null
+++ b/include/llvm/Wrap/wrapper_input.h
@@ -0,0 +1,38 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Define a generic interface to a file/memory region that contains
+// a bitcode file, a wrapped bitcode file, or a data file to wrap.
+
+#ifndef LLVM_WRAP_WRAPPER_INPUT_H__
+#define LLVM_WRAP_WRAPPER_INPUT_H__
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "llvm/Support/support_macros.h"
+
+// The following is a generic interface to a file/memory region that contains
+// a bitcode file, a wrapped bitcode file, or data file to wrap.
+class WrapperInput {
+ public:
+  WrapperInput() {}
+  virtual ~WrapperInput() {}
+  // Tries to read the requested number of bytes into the buffer. Returns the
+  // actual number of bytes read.
+  virtual size_t Read(uint8_t* buffer, size_t wanted) = 0;
+  // Returns true if at end of input. Note: May return false until
+  // Read is called, and returns 0.
+  virtual bool AtEof() = 0;
+  // Returns the size of the input (in bytes).
+  virtual off_t Size() = 0;
+  // Moves to the given offset within the input region. Returns false
+  // if unable to move to that position.
+  virtual bool Seek(uint32_t pos) = 0;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperInput);
+};
+
+#endif  // LLVM_WRAP_WRAPPER_INPUT_H__
diff --git a/include/llvm/Wrap/wrapper_output.h b/include/llvm/Wrap/wrapper_output.h
new file mode 100644
index 0000000000..7045705991
--- /dev/null
+++ b/include/llvm/Wrap/wrapper_output.h
@@ -0,0 +1,34 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines a generic interface to a file/memory region that
+// contains a generated wrapped bitcode file, bitcode file,
+// or data file.
+
+#ifndef LLVM_WRAP_WRAPPER_OUTPUT_H__
+#define LLVM_WRAP_WRAPPER_OUTPUT_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "llvm/Support/support_macros.h"
+
+// The following is a generic interface to a file/memory region
+// that contains a generated bitcode file, wrapped bitcode file,
+// or a data file.
+class WrapperOutput {
+ public:
+  WrapperOutput() {}
+  virtual ~WrapperOutput() {}
+  // Writes a single byte, returning false if unable to write.
+  virtual bool Write(uint8_t byte) = 0;
+  // Writes the specified number of bytes in the buffer to
+  // output. Returns false if unable to write.
+  virtual bool Write(const uint8_t* buffer, size_t buffer_size);
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperOutput);
+};
+
+#endif  // LLVM_WRAP_WRAPPER_OUTPUT_H__
diff --git a/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
new file mode 100644
index 0000000000..661bb47550
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/NaClJITMemoryManager.cpp
@@ -0,0 +1,429 @@
+//===-- NaClJITMemoryManager.cpp - Memory Allocator for JIT'd code --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the NaClJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/NaClJITMemoryManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+#if defined(__linux__) || defined(__native_client__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+using namespace llvm;
+
+#ifdef __native_client__
+// etext is guarded by ifdef so the code still compiles on non-ELF platforms
+extern char etext;
+#endif
+
+// The way NaCl linking is currently setup, there is a gap between the text
+// segment and the rodata segment where we can fill dyncode. The text ends
+// at etext, but there's no symbol for the start of rodata. Currently the
+// linker script puts it at 0x11000000
+// If we run out of space there, we can also allocate below the text segment
+// and keep going downward until we run into code loaded by the dynamic
+// linker. (TODO(dschuff): make that work)
+// For now, just start at etext and go until we hit rodata
+
+// It's an open issue that lazy jitting is not thread safe (PR5184). However
+// NaCl's dyncode_create solves exactly this problem, so in the future
+// this allocator could (should?) be made thread safe
+
+const size_t NaClJITMemoryManager::kStubSlabSize;
+const size_t NaClJITMemoryManager::kDataSlabSize;
+const size_t NaClJITMemoryManager::kCodeSlabSize;
+
+// TODO(dschuff) fix allocation start (etext + 64M is hopefully after where
+// glibc is loaded) and limit (maybe need a linker-provide symbol for the start
+// of the IRT or end of the segment gap)
+// (also fix allocateCodeSlab and maybe allocateStubSlab at that time)
+// what we really need is a usable nacl_dyncode_alloc(), but this could still
+// be improved upon using dl_iterate_phdr
+const static intptr_t kNaClSegmentGapEnd = 0x11000000;
+
+NaClJITMemoryManager::NaClJITMemoryManager() :
+    AllocatableRegionLimit((uint8_t *)kNaClSegmentGapEnd),
+    NextCode(AllocatableRegionStart), GOTBase(NULL) {
+#ifdef __native_client__
+  AllocatableRegionStart = (uint8_t *)&etext + 1024*1024*64;
+#else
+    assert(false && "NaClJITMemoryManager will not work outside NaCl sandbox");
+#endif
+  AllocatableRegionStart =
+      (uint8_t *)RoundUpToAlignment((uint64_t)AllocatableRegionStart,
+                                    kBundleSize);
+  NextCode = AllocatableRegionStart;
+
+  // Allocate 1 stub slab to get us started
+  CurrentStubSlab = allocateStubSlab(0);
+  InitFreeList(&CodeFreeListHead);
+  InitFreeList(&DataFreeListHead);
+
+  DEBUG(dbgs() << "NaClJITMemoryManager: AllocatableRegionStart " <<
+        AllocatableRegionStart << " Limit " << AllocatableRegionLimit << "\n");
+}
+
+NaClJITMemoryManager::~NaClJITMemoryManager() {
+  delete [] GOTBase;
+  DestroyFreeList(CodeFreeListHead);
+  DestroyFreeList(DataFreeListHead);
+}
+
+FreeListNode *NaClJITMemoryManager::allocateCodeSlab(size_t MinSize) {
+  FreeListNode *node = new FreeListNode();
+  if (AllocatableRegionLimit - NextCode < (int)kCodeSlabSize) {
+    // TODO(dschuff): might be possible to try the space below text segment?
+    report_fatal_error("Ran out of code space");
+  }
+  node->address = NextCode;
+  node->size = std::max(kCodeSlabSize, MinSize);
+  NextCode += node->size;
+  DEBUG(dbgs() << "allocated code slab " << NextCode - node->size << "-" <<
+        NextCode << "\n");
+  return node;
+}
+
+SimpleSlab NaClJITMemoryManager::allocateStubSlab(size_t MinSize) {
+  SimpleSlab s;
+  DEBUG(dbgs() << "allocateStubSlab: ");
+  // It's a little weird to just allocate and throw away the FreeListNode, but
+  // since code region allocation is still a bit ugly and magical, I decided
+  // it's better to reuse allocateCodeSlab than duplicate the logic.
+  FreeListNode *n = allocateCodeSlab(MinSize);
+  s.address = n->address;
+  s.size = n->size;
+  s.next_free = n->address;
+  delete n;
+  return s;
+}
+
+FreeListNode *NaClJITMemoryManager::allocateDataSlab(size_t MinSize) {
+  FreeListNode *node = new FreeListNode;
+  size_t size = std::max(kDataSlabSize, MinSize);
+  node->address = (uint8_t*)DataAllocator.Allocate(size, kBundleSize);
+  node->size = size;
+  return node;
+}
+
+void NaClJITMemoryManager::InitFreeList(FreeListNode **Head) {
+  // Make sure there is always at least one entry in the free list
+  *Head = new FreeListNode;
+  (*Head)->Next = (*Head)->Prev = *Head;
+  (*Head)->size = 0;
+}
+
+void NaClJITMemoryManager::DestroyFreeList(FreeListNode *Head) {
+  FreeListNode *n = Head->Next;
+  while(n != Head) {
+    FreeListNode *next = n->Next;
+    delete n;
+    n = next;
+  }
+  delete Head;
+}
+
+FreeListNode *NaClJITMemoryManager::FreeListAllocate(uintptr_t &ActualSize,
+    FreeListNode *Head,
+    FreeListNode * (NaClJITMemoryManager::*allocate)(size_t)) {
+  FreeListNode *candidateBlock = Head;
+  FreeListNode *iter = Head->Next;
+
+  uintptr_t largest = candidateBlock->size;
+  // Search for the largest free block
+  while (iter != Head) {
+    if (iter->size > largest) {
+      largest = iter->size;
+      candidateBlock = iter;
+    }
+    iter = iter->Next;
+  }
+
+  if (largest < ActualSize || largest == 0) {
+    candidateBlock = (this->*allocate)(ActualSize);
+  } else {
+    candidateBlock->RemoveFromFreeList();
+  }
+  return candidateBlock;
+}
+
+void NaClJITMemoryManager::FreeListFinishAllocation(FreeListNode *Block,
+    FreeListNode *Head, uint8_t *AllocationStart, uint8_t *AllocationEnd,
+    AllocationTable &Table) {
+  assert(AllocationEnd > AllocationStart);
+  assert(Block->address == AllocationStart);
+  uint8_t *End = (uint8_t *)RoundUpToAlignment((uint64_t)AllocationEnd,
+                                               kBundleSize);
+  assert(End <= Block->address + Block->size);
+  int AllocationSize = End - Block->address;
+  Table[AllocationStart] = AllocationSize;
+
+  Block->size -= AllocationSize;
+  if (Block->size >= kBundleSize * 2) {//TODO(dschuff): better heuristic?
+    Block->address = End;
+    Block->AddToFreeList(Head);
+  } else {
+    delete Block;
+  }
+  DEBUG(dbgs()<<"FinishAllocation size "<< AllocationSize <<" end "<<End<<"\n");
+}
+
+void NaClJITMemoryManager::FreeListDeallocate(FreeListNode *Head,
+                                              AllocationTable &Table,
+                                              void *Body) {
+  uint8_t *Allocation = (uint8_t *)Body;
+  DEBUG(dbgs() << "deallocating "<<Body<<" ");
+  assert(Table.count(Allocation) && "FreeList Deallocation not found in table");
+  FreeListNode *Block = new FreeListNode;
+  Block->address = Allocation;
+  Block->size = Table[Allocation];
+  Block->AddToFreeList(Head);
+  DEBUG(dbgs() << "deallocated "<< Allocation<< " size " << Block->size <<"\n");
+}
+
+uint8_t *NaClJITMemoryManager::startFunctionBody(const Function *F,
+                                                 uintptr_t &ActualSize) {
+  CurrentCodeBlock = FreeListAllocate(ActualSize, CodeFreeListHead,
+                                  &NaClJITMemoryManager::allocateCodeSlab);
+  DEBUG(dbgs() << "startFunctionBody CurrentBlock " << CurrentCodeBlock <<
+        " addr " << CurrentCodeBlock->address << "\n");
+  ActualSize = CurrentCodeBlock->size;
+  return CurrentCodeBlock->address;
+}
+
+void NaClJITMemoryManager::endFunctionBody(const Function *F,
+                                           uint8_t *FunctionStart,
+                                           uint8_t *FunctionEnd) {
+  DEBUG(dbgs() << "endFunctionBody ");
+  FreeListFinishAllocation(CurrentCodeBlock, CodeFreeListHead,
+                           FunctionStart, FunctionEnd, AllocatedFunctions);
+
+}
+
+uint8_t *NaClJITMemoryManager::allocateCodeSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  llvm_unreachable("Implement me! (or don't.)");
+}
+
+uint8_t *NaClJITMemoryManager::allocateDataSection(uintptr_t Size,
+                                                   unsigned Alignment,
+                                                   unsigned SectionID) {
+  return (uint8_t *)DataAllocator.Allocate(Size, Alignment);
+}
+
+void NaClJITMemoryManager::deallocateFunctionBody(void *Body) {
+  DEBUG(dbgs() << "deallocateFunctionBody, ");
+  if (Body) FreeListDeallocate(CodeFreeListHead, AllocatedFunctions, Body);
+}
+
+uint8_t *NaClJITMemoryManager::allocateStub(const GlobalValue* F,
+                                            unsigned StubSize,
+                                            unsigned Alignment) {
+  uint8_t *StartAddress = (uint8_t *)(uintptr_t)
+      RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment);
+  if (StartAddress + StubSize >
+      CurrentStubSlab.address + CurrentStubSlab.size) {
+    CurrentStubSlab = allocateStubSlab(kStubSlabSize);
+    StartAddress = (uint8_t *)(uintptr_t)
+        RoundUpToAlignment((uintptr_t)CurrentStubSlab.next_free, Alignment);
+  }
+  CurrentStubSlab.next_free = StartAddress + StubSize;
+  DEBUG(dbgs() <<"allocated stub "<<StartAddress<< " size "<<StubSize<<"\n");
+  return StartAddress;
+}
+
+uint8_t *NaClJITMemoryManager::allocateSpace(intptr_t Size,
+                                             unsigned Alignment) {
+  uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+  DEBUG(dbgs() << "allocateSpace " << Size <<"/"<<Alignment<<" ret "<<r<<"\n");
+  return r;
+}
+
+uint8_t *NaClJITMemoryManager::allocateGlobal(uintptr_t Size,
+                                              unsigned Alignment) {
+  uint8_t *r = (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+  DEBUG(dbgs() << "allocateGlobal " << Size <<"/"<<Alignment<<" ret "<<r<<"\n");
+  return r;
+}
+
+uint8_t* NaClJITMemoryManager::startExceptionTable(const Function* F,
+                                                   uintptr_t &ActualSize) {
+  CurrentDataBlock = FreeListAllocate(ActualSize, DataFreeListHead,
+                                      &NaClJITMemoryManager::allocateDataSlab);
+  DEBUG(dbgs() << "startExceptionTable CurrentBlock " << CurrentDataBlock <<
+        " addr " << CurrentDataBlock->address << "\n");
+  ActualSize = CurrentDataBlock->size;
+  return CurrentDataBlock->address;
+}
+
+void NaClJITMemoryManager::endExceptionTable(const Function *F,
+                                           uint8_t *TableStart,
+                       uint8_t *TableEnd, uint8_t* FrameRegister) {
+  DEBUG(dbgs() << "endExceptionTable ");
+  FreeListFinishAllocation(CurrentDataBlock, DataFreeListHead,
+                           TableStart, TableEnd, AllocatedTables);
+}
+
+void NaClJITMemoryManager::deallocateExceptionTable(void *ET) {
+  DEBUG(dbgs() << "deallocateExceptionTable, ");
+  if (ET) FreeListDeallocate(DataFreeListHead, AllocatedTables, ET);
+}
+
+// Copy of DefaultJITMemoryManager's implementation
+void NaClJITMemoryManager::AllocateGOT() {
+  assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
+  HasGOT = true;
+}
+
+//===----------------------------------------------------------------------===//
+// getPointerToNamedFunction() implementation.
+// This code is pasted directly from r153607 of JITMemoryManager.cpp and has
+// never been tested. It most likely doesn't work inside the sandbox.
+//===----------------------------------------------------------------------===//
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+/* stat functions are redirecting to __xstat with a version number.  On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *NaClJITMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  // Check to see if this is one of the functions we want to intercept.  Note,
+  // we cast to intptr_t here to silence a -pedantic warning that complains
+  // about casting a function pointer to a normal pointer.
+  if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+  if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  // If this is an asm specifier, skip the sentinal.
+  if (NameStr[0] == 1) ++NameStr;
+
+  // If it's an external function, look it up in the process image...
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+  // are references to hidden visibility symbols that dlsym cannot resolve.
+  // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+  if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+      memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+    // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+    // This mirrors logic in libSystemStubs.a.
+    std::string Prefix = std::string(Name.begin(), Name.end()-9);
+    if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+      return Ptr;
+    if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+      return Ptr;
+  }
+#endif
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/lib/Target/ARM/ARMInstrNaCl.td b/lib/Target/ARM/ARMInstrNaCl.td
new file mode 100644
index 0000000000..c884cd0fe4
--- /dev/null
+++ b/lib/Target/ARM/ARMInstrNaCl.td
@@ -0,0 +1,145 @@
+//====- ARMInstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the modifications to the X86 instruction set needed for
+// Native Client code generation.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+//
+//                       Native Client Pseudo-Instructions
+//
+// These instructions implement the Native Client pseudo-instructions, such
+// as nacljmp and naclasp.
+//
+// TableGen and MC consider these to be "real" instructions. They can be
+// parsed by the AsmParser and emitted by the AsmStreamer as if they
+// were just regular instructions. They are not marked "Pseudo" because
+// this would imply isCodeGenOnly=1, which would stop them from being
+// parsed by the assembler.
+//
+// These instructions cannot be encoded (written into an object file) by the
+// MCCodeEmitter. Instead, during direct object emission, they get lowered to
+// a sequence of streamer emits. (see ARMInstrNaCl.cpp)
+// 
+// These instructions should not be used in CodeGen. They have no pattern
+// and lack CodeGen metadata. Instead, the ARMNaClRewritePass should
+// generate these instructions after CodeGen is finished.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM Native Client "Pseudo" Instructions
+//===----------------------------------------------------------------------===//
+
+// It does not seem possible to define a single base class for both the 
+// synthetic isCodeGenOnly=1 instructions as well as the isAsmParserOnly=1 
+// versions.
+
+// These are the fixed flags:
+// AddrMode am = AddrModeNone
+// SizeFlagVal sz = SizeSpecial
+// IndexMode im = IndexModeNone
+// Domain d = GenericDomain
+// InstrItinClass = NoItinerary
+
+// The non-fixed flags need their own class 
+// InOperandList = !con(iops, (ins pred:$p)) or left alone
+// AsmString = !strconcat(opc, "${p}", asm) or left alone
+// Format f = MiscFrm/Pseudo
+// isPredicable = 0/1 
+
+/// However, it is possible to make a set of two base classes for the isAsmParserOnly=1
+/// synthetic instructions.
+
+
+/***** FIXME: ADD in isAsmParserOnly naclguard instructions ***************************
+/// required args:
+// dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat
+
+class NaClSI<dag outs, dag ins, string opc, string asm, string cstr> 
+  : I<outs, ins, AddrModeNone, SizeSpecial, IndexModeNone, MiscFrm, 
+              NoItinerary, opc, asm, cstr, pat>, Requires<[IsNaCl]>;
+
+class NaClSINoP<dag outs, dag ins, string opc, string asm, string cstr> 
+  : InoP <outs, ins, AddrModeNone, SizeSpecial, IndexModeNone, MiscFrm, 
+              NoItinerary, opc, asm, cstr, pat>, Requires<[IsNaCl]>;
+
+class NaClSI<dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat> 
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, f,
+            GenericDomain, cstr, NoItinerary>, Requires<[IsNaCl]> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let AsmString = !strconcat(opc, asm);
+};
+
+
+/// For not pseudo instructionable      
+class NaClSINoP<dag outs, dag ins, string opc, string asm, string cstr, Format f, list<dag> pat> 
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, f,
+            GenericDomain, cstr, NoItinerary>, Requires<[IsNaCl]> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+  let AsmString = !strconcat(opc, asm);
+};
+
+/// This is the guarded isCodeGenOnly pseudo instruction for BX_RET
+let isReturn = 1, isTerminator = 1, isBarrier = 1, isCodeGenOnly = 1,
+    // Make sure this is selected in lieu of 
+    AddedComplexity = 1 
+ in {
+  // ARMV4T and above
+  def NACL_CG_BX_RET : 
+     ARMPseudoInst<(outs), (ins), BrMiscFrm, IIC_Br, 
+                   "naclbx", "\tlr", [(ARMretflag)]>,
+     Requires<[HasV4T, IsNaCl]> {
+  }
+}
+
+
+// These are assembler only instructions 
+let isAsmParserOnly = 1 in {
+  def NACL_GUARD_LOADSTORE :
+  NaClSI<(outs GPR:$dst), (ins GPR:$a), 
+         "naclguard", "\t${dst}, ${a}", "" []>;
+
+  let Defs = [CPSR] in
+  def NACL_GUARD_LOADSTORE_TST :
+    NaClSINoP<
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a), NoItinerary, []> ;
+
+
+  let Defs = [CPSR] in
+  def NACL_GUARD_LOADSTORE_TST :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a), NoItinerary, []>;
+
+  def NACL_GUARD_INDIRECT_CALL :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+  def NACL_GUARD_INDIRECT_JMP :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+  def NACL_GUARD_CALL :
+  PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+  // NOTE: the BX_RET instruction hardcodes lr as well
+  def NACL_GUARD_RETURN :
+  PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+  // Note: intention is that $src and $dst are the same register.
+  def NACL_DATA_MASK :
+  PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+}
+
+
+**************************************************************************/
diff --git a/lib/Target/ARM/ARMNaClHeaders.cpp b/lib/Target/ARM/ARMNaClHeaders.cpp
new file mode 100644
index 0000000000..781702158a
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClHeaders.cpp
@@ -0,0 +1,192 @@
+//===-- ARMNaClHeaders.cpp - Print SFI headers to an ARM .s file -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in ARM assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "ARMNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitSFIHeaders(raw_ostream &O) {
+  O << " @ ========================================\n";
+  O << "@ Branch: " << FlagSfiBranch << "\n";
+  O << "@ Stack: " << FlagSfiStack << "\n";
+  O << "@ Store: " << FlagSfiStore << "\n";
+  O << "@ Data: " << FlagSfiData << "\n";
+
+  O << " @ ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_illegal_if_at_bundle_begining\n"
+    "\tsfi_long_based_on_pos 1 0 0 0 0xe1277777\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0xe320f000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " @ ========================================\n";
+  if (FlagSfiZeroMask) {
+    // This mode sets all mask to zero which makes them into nops
+    // this is useful for linking this code against non-sandboxed code
+    // for debugging purposes
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0x00000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+  } else {
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0xc000000f\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " @ ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble cond=\n"
+      "\tsfi_nops_to_force_slot3\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+    
+    // This is used just before "bx rx"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble link cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is use just before "blx rx"
+    O <<
+      "\t.macro sfi_indirect_call_preamble link cond=\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " @ ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg, \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_tst \\reg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  const char* kPreds[] = {
+    "eq",
+    "ne",
+    "lt",
+    "le",
+    "ls",
+    "ge",
+    "gt",
+    "hs",
+    "hi",
+    "lo",
+    "mi",
+    "pl",
+    NULL,
+  };
+
+  O << " @ ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp
new file mode 100644
index 0000000000..91087aaaa2
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.cpp
@@ -0,0 +1,755 @@
+//===-- ARMNaClRewritePass.cpp - Native Client Rewrite Pass  ------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+// 
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-sfi"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <set>
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace llvm {
+
+cl::opt<bool>
+FlagSfiData("sfi-data", cl::desc("use illegal at data bundle beginning"));
+
+cl::opt<bool>
+FlagSfiLoad("sfi-load", cl::desc("enable sandboxing for load"));
+
+cl::opt<bool>
+FlagSfiStore("sfi-store", cl::desc("enable sandboxing for stores"));
+
+cl::opt<bool>
+FlagSfiStack("sfi-stack", cl::desc("enable sandboxing for stack changes"));
+
+cl::opt<bool>
+FlagSfiBranch("sfi-branch", cl::desc("enable sandboxing for branches"));
+
+}
+
+namespace {
+  class ARMNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxMemoryReferencesInBlock(MachineBasicBlock &MBB);
+    void SandboxMemory(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI,
+                       MachineInstr &MI,
+                       int AddrIdx,
+                       bool CPSRLive,
+                       bool IsLoad);
+    bool TryPredicating(MachineInstr &MI, ARMCC::CondCodes);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void LightweightVerify(MachineFunction &MF);
+  };
+  char ARMNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == ARM::BX_RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BX:
+   case ARM::TAILJMPr:
+    return true;
+  }
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return MI.getOpcode() == ARM::BLX;
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BL:
+   case ARM::BL_pred:
+   case ARM::TPsoft:
+     return true;
+  }
+}
+
+static bool IsCPSRLiveOut(const MachineBasicBlock &MBB) {
+  // CPSR is live-out if any successor lists it as live-in.
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+                                              E = MBB.succ_end();
+       SI != E;
+       ++SI) {
+    const MachineBasicBlock *Succ = *SI;
+    if (Succ->isLiveIn(ARM::CPSR)) return true;
+  }
+  return false;
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  dbgs() << MI;
+  dbgs() << MI.getNumOperands() << " operands:" << "\n";
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+    const MachineOperand& op = MI.getOperand(i);
+    dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+  }
+  dbgs() << "\n";
+}
+
+static void DumpBasicBlockVerbose(const MachineBasicBlock &MBB) {
+  dbgs() << "\n<<<<< DUMP BASIC BLOCK START\n";
+  for (MachineBasicBlock::const_iterator MBBI = MBB.begin(), MBBE = MBB.end();
+       MBBI != MBBE;
+       ++MBBI) {
+    DumpInstructionVerbose(*MBBI);
+  }
+  dbgs() << "<<<<< DUMP BASIC BLOCK END\n\n";
+}
+
+static void DumpBasicBlockVerboseCond(const MachineBasicBlock &MBB, bool b) {
+  if (b) {
+    DumpBasicBlockVerbose(MBB);
+  }
+}
+
+/**********************************************************************/
+/* Exported functions */
+
+namespace ARM_SFI {
+
+bool IsStackChange(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+  return MI.modifiesRegister(ARM::SP, TRI);
+}
+
+bool NextInstrMasksSP(const MachineInstr &MI) {
+  MachineBasicBlock::const_iterator It = &MI;
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  MachineBasicBlock::const_iterator next = ++It;
+  if (next == MBB->end()) {
+    return false;
+  }
+
+  const MachineInstr &next_instr = *next;
+  unsigned opcode = next_instr.getOpcode();
+  return (opcode == ARM::SFI_DATA_MASK) &&
+      (next_instr.getOperand(0).getReg() == ARM::SP);
+}
+
+bool IsSandboxedStackChange(const MachineInstr &MI) {
+  // Calls do not change the stack on ARM but they have implicit-defs, so
+  // make sure they do not get sandboxed.
+  if (MI.getDesc().isCall())
+    return true;
+
+  unsigned opcode = MI.getOpcode();
+  switch (opcode) {
+    default: break;
+
+    // These just bump SP by a little (and access the stack),
+    // so that is okay due to guard pages.
+    case ARM::STMIA_UPD:
+    case ARM::STMDA_UPD:
+    case ARM::STMDB_UPD:
+    case ARM::STMIB_UPD:
+
+    case ARM::VSTMDIA_UPD:
+    case ARM::VSTMDDB_UPD:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDMIA_UPD:
+    case ARM::LDMDA_UPD:
+    case ARM::LDMDB_UPD:
+    case ARM::LDMIB_UPD:
+
+    case ARM::VLDMDIA_UPD:
+    case ARM::VLDMDDB_UPD:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD: {
+      bool dest_SP = false;
+      // Dest regs start at operand index 4.
+      for (unsigned i = 4; i < MI.getNumOperands(); ++i) {
+        const MachineOperand &DestReg = MI.getOperand(i);
+        dest_SP = dest_SP || (DestReg.getReg() == ARM::SP);
+      }
+      if (dest_SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Some localmods *should* prevent selecting a reg offset
+    // (see SelectAddrMode2 in ARMISelDAGToDAG.cpp).
+    // Otherwise, the store is already a potential violation.
+    case ARM::STR_PRE_REG:
+    case ARM::STR_PRE_IMM:
+
+    case ARM::STRH_PRE:
+
+    case ARM::STRB_PRE_REG:
+    case ARM::STRB_PRE_IMM:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDRi12:
+    case ARM::LDR_PRE_REG:
+    case ARM::LDR_PRE_IMM:
+    case ARM::LDRH_PRE:
+    case ARM::LDRB_PRE_REG:
+    case ARM::LDRB_PRE_IMM:
+    case ARM::LDRSH_PRE:
+    case ARM::LDRSB_PRE: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Here, if SP is the base / write-back reg, we need to check if
+    // a reg is used as offset (otherwise it is not a small nudge).
+    case ARM::STR_POST_REG:
+    case ARM::STR_POST_IMM:
+    case ARM::STRH_POST:
+    case ARM::STRB_POST_REG:
+    case ARM::STRB_POST_IMM: {
+      const MachineOperand &WBReg = MI.getOperand(0);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+
+    // Similar, but also check that DestReg is not SP.
+    case ARM::LDR_POST_REG:
+    case ARM::LDR_POST_IMM:
+    case ARM::LDRB_POST_REG:
+    case ARM::LDRB_POST_IMM:
+    case ARM::LDRH_POST:
+    case ARM::LDRSH_POST:
+    case ARM::LDRSB_POST: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      const MachineOperand &WBReg = MI.getOperand(1);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+  }
+
+  return (NextInstrMasksSP(MI));
+}
+
+bool NeedSandboxStackChange(const MachineInstr &MI,
+                               const TargetRegisterInfo *TRI) {
+  return (IsStackChange(MI, TRI) && !IsSandboxedStackChange(MI));
+}
+
+} // namespace ARM_SFI
+
+/**********************************************************************/
+
+void ARMNaClRewritePass::getAnalysisUsage(AnalysisUsage &AU) const {
+  // Slight (possibly unnecessary) efficiency tweak:
+  // Promise not to modify the CFG.
+  AU.setPreservesCFG();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/*
+ * A primitive validator to catch problems at compile time.
+ * E.g., it could be used along with bugpoint to reduce a bitcode file.
+ */
+void ARMNaClRewritePass::LightweightVerify(MachineFunction &MF) {
+
+  for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+       MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
+         MBBI != MBBE;
+         ++MBBI) {
+      MachineInstr &MI = *MBBI;
+
+      if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+        dbgs() << "LightWeightVerify for function: "
+               << MF.getFunction()->getName() << "  (BAD STACK CHANGE)\n";
+        DumpInstructionVerbose(MI);
+        DumpBasicBlockVerbose(MBB);
+        //        assert(false && "LightweightVerify Failed");
+      }
+    }
+  }
+}
+
+void ARMNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  // (1) Ensure there is room in the bundle for a data mask instruction
+  // (nop'ing to the next bundle if needed).
+  // (2) Do a data mask on SP after the instruction that updated SP.
+  MachineInstr &MI = *MBBI;
+
+  // Use same predicate as current instruction.
+  ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(ARM::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr (one + to get the original, and one more + to get past)
+  MachineBasicBlock::iterator MBBINext = (MBBI++);
+  MachineBasicBlock::iterator MBBINext2 = (MBBI++);
+
+  BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
+          TII->get(ARM::SFI_DATA_MASK))
+      .addReg(ARM::SP)         // modify SP (as dst)
+      .addReg(ARM::SP)         // start with SP (as src)
+      .addImm((int64_t) Pred)  // predicate condition
+      .addReg(ARM::CPSR);      // predicate source register (CPSR)
+
+  return;
+}
+
+bool ARMNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified |= true;
+    }
+  }
+  return Modified;
+}
+
+bool ARMNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+
+    if (IsReturn(MI)) {
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_RETURN))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectJump(MI)) {
+      MachineOperand &Addr = MI.getOperand(0);
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_JMP))
+        .addOperand(Addr)        // rD
+        .addReg(0)               // apparently unused source register?
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsDirectCall(MI)) {
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_CALL))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectCall(MI)) {
+      MachineOperand &Addr = MI.getOperand(0);
+      ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_CALL))
+        .addOperand(Addr)        // rD
+        .addReg(0)               // apparently unused source register?
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(ARM::CPSR);      // predicate source register (CPSR)
+        Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+bool ARMNaClRewritePass::TryPredicating(MachineInstr &MI, ARMCC::CondCodes Pred) {
+  // Can't predicate if it's already predicated.
+  // TODO(cbiffle): actually we can, if the conditions match.
+  if (TII->isPredicated(&MI)) return false;
+
+  /*
+   * ARM predicate operands use two actual MachineOperands: an immediate
+   * holding the predicate condition, and a register referencing the flags.
+   */
+  SmallVector<MachineOperand, 2> PredOperands;
+  PredOperands.push_back(MachineOperand::CreateImm((int64_t) Pred));
+  PredOperands.push_back(MachineOperand::CreateReg(ARM::CPSR, false));
+
+  // This attempts to rewrite, but some instructions can't be predicated.
+  return TII->PredicateInstruction(&MI, PredOperands);
+}
+
+static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMSIA:
+    *AddrIdx = 0;
+    break;
+  // Instructions with base address register in position 1...
+  case ARM::LDMIA_UPD: // same reg at position 0 and position 1
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+
+  case ARM::LDRSB:
+  case ARM::LDRH:
+  case ARM::LDRSH:
+
+  case ARM::LDRi12:
+  case ARM::LDRrs:
+  case ARM::LDRBi12:
+  case ARM::LDRBrs:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VLDRS:
+  case ARM::VLDRD:
+
+  case ARM::LDREX:
+  case ARM::LDREXB:
+  case ARM::LDREXH:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::LDR_PRE_REG:
+  case ARM::LDR_PRE_IMM:
+  case ARM::LDR_POST_REG:
+  case ARM::LDR_POST_IMM:
+
+  case ARM::LDRB_PRE_REG:
+  case ARM::LDRB_PRE_IMM:
+  case ARM::LDRB_POST_REG:
+  case ARM::LDRB_POST_IMM:
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+
+  case ARM::LDRD:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ * Sandboxes a memory reference instruction by inserting an appropriate mask
+ * or check operation before it.
+ */
+void ARMNaClRewritePass::SandboxMemory(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       MachineInstr &MI,
+                                       int AddrIdx,
+                                       bool CPSRLive,
+                                       bool IsLoad) {
+  MachineOperand &Addr = MI.getOperand(AddrIdx);
+
+  if (!CPSRLive && TryPredicating(MI, ARMCC::EQ)) {
+    /*
+     * For unconditional memory references where CPSR is not in use, we can use
+     * a faster sandboxing sequence by predicating the load/store -- assuming we
+     * *can* predicate the load/store.
+     */
+
+    // TODO(sehr): add SFI_GUARD_SP_LOAD_TST.
+    // Instruction can be predicated -- use the new sandbox.
+    BuildMI(MBB, MBBI, MI.getDebugLoc(),
+            TII->get(ARM::SFI_GUARD_LOADSTORE_TST))
+      .addOperand(Addr)   // rD
+      .addReg(0);         // apparently unused source register?
+  } else {
+    unsigned Opcode;
+    if (IsLoad && (MI.getOperand(0).getReg() == ARM::SP)) {
+      Opcode = ARM::SFI_GUARD_SP_LOAD;
+    } else {
+      Opcode = ARM::SFI_GUARD_LOADSTORE;
+    }
+    // Use the older BIC sandbox, which is universal, but incurs a stall.
+    ARMCC::CondCodes Pred = TII->getPredicate(&MI);
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+      .addOperand(Addr)        // rD
+      .addReg(0)               // apparently unused source register?
+      .addImm((int64_t) Pred)  // predicate condition
+      .addReg(ARM::CPSR);      // predicate source register (CPSR)
+
+    /*
+     * This pseudo-instruction is intended to generate something resembling the
+     * following, but with alignment enforced.
+     * TODO(cbiffle): move alignment into this function, use the code below.
+     *
+     *  // bic<cc> Addr, Addr, #0xC0000000
+     *  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+     *          TII->get(ARM::BICri))
+     *    .addOperand(Addr)        // rD
+     *    .addOperand(Addr)        // rN
+     *    .addImm(0xC0000000)      // imm
+     *    .addImm((int64_t) Pred)  // predicate condition
+     *    .addReg(ARM::CPSR)       // predicate source register (CPSR)
+     *    .addReg(0);              // flag output register (0 == no flags)
+     */
+  }
+}
+
+static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMSIA:
+    *AddrIdx = 0;
+    break;
+
+  // Instructions with base address register in position 1...
+  case ARM::STMIA_UPD: // same reg at position 0 and position 1
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+
+  case ARM::STRH:
+  case ARM::STRi12:
+  case ARM::STRrs:
+  case ARM::STRBi12:
+  case ARM::STRBrs:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VSTRS:
+  case ARM::VSTRD:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::STR_PRE_REG:
+  case ARM::STR_PRE_IMM:
+  case ARM::STR_POST_REG:
+  case ARM::STR_POST_IMM:
+
+  case ARM::STRB_PRE_REG:
+  case ARM::STRB_PRE_IMM:
+  case ARM::STRB_POST_REG:
+  case ARM::STRB_POST_IMM:
+
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+
+
+  case ARM::STRD:
+  case ARM::STREX:
+  case ARM::STREXB:
+  case ARM::STREXH:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+bool ARMNaClRewritePass::SandboxMemoryReferencesInBlock(
+    MachineBasicBlock &MBB) {
+  /*
+   * This is a simple local reverse-dataflow analysis to determine where CPSR
+   * is live.  We cannot use the conditional store sequence anywhere that CPSR
+   * is live, or we'd affect correctness.  The existing liveness analysis passes
+   * barf when applied pre-emit, after allocation, so we must do it ourselves.
+   */
+
+  // LOCALMOD(pdox): Short-circuit this function. Assume CPSR is always live,
+  //                 until we figure out why the assert is tripping.
+  bool Modified2 = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) {
+      bool CPSRLive = true;
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true);
+      Modified2 = true;
+    }
+    if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) {
+      bool CPSRLive = true;
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false);
+      Modified2 = true;
+    }
+  }
+  return Modified2;
+  // END LOCALMOD(pdox)
+
+  bool CPSRLive = IsCPSRLiveOut(MBB);
+
+  // Given that, record which instructions should not be altered to trash CPSR:
+  std::set<const MachineInstr *> InstrsWhereCPSRLives;
+  for (MachineBasicBlock::const_reverse_iterator MBBI = MBB.rbegin(),
+                                                 E = MBB.rend();
+       MBBI != E;
+       ++MBBI) {
+    const MachineInstr &MI = *MBBI;
+    // Check for kills first.
+    if (MI.modifiesRegister(ARM::CPSR, TRI)) CPSRLive = false;
+    // Then check for uses.
+    if (MI.readsRegister(ARM::CPSR)) CPSRLive = true;
+
+    if (CPSRLive) InstrsWhereCPSRLives.insert(&MI);
+  }
+
+  // Sanity check:
+  assert(CPSRLive == MBB.isLiveIn(ARM::CPSR)
+         && "CPSR Liveness analysis does not match cached live-in result.");
+
+  // Now: find and sandbox stores.
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) {
+      bool CPSRLive =
+        (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end());
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, true);
+      Modified = true;
+    }
+    if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) {
+      bool CPSRLive =
+        (InstrsWhereCPSRLives.find(&MI) != InstrsWhereCPSRLives.end());
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, CPSRLive, false);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+/**********************************************************************/
+
+bool ARMNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (MBB.hasAddressTaken()) {
+      //FIXME: use symbolic constant or get this value from some configuration
+      MBB.setAlignment(4);
+      Modified = true;
+    }
+
+    if (FlagSfiLoad || FlagSfiStore)
+      Modified |= SandboxMemoryReferencesInBlock(MBB);
+    if (FlagSfiBranch) Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)  Modified |= SandboxStackChangesInBlock(MBB);
+  }
+  DEBUG(LightweightVerify(MF));
+  return Modified;
+}
+
+/// createARMNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createARMNaClRewritePass() {
+  return new ARMNaClRewritePass();
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.h b/lib/Target/ARM/ARMNaClRewritePass.h
new file mode 100644
index 0000000000..c8854a54fc
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.h
@@ -0,0 +1,36 @@
+//===-- ARMNaClRewritePass.h - NaCl Sandboxing Pass    ------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARMNACLREWRITEPASS_H
+#define TARGET_ARMNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiZeroMask;
+  extern cl::opt<bool> FlagSfiData;
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+namespace ARM_SFI {
+
+bool IsStackChange(const llvm::MachineInstr &MI,
+                   const llvm::TargetRegisterInfo *TRI);
+bool IsSandboxedStackChange(const llvm::MachineInstr &MI);
+bool NeedSandboxStackChange(const llvm::MachineInstr &MI,
+                               const llvm::TargetRegisterInfo *TRI);
+
+} // namespace ARM_SFI
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
new file mode 100644
index 0000000000..ce68d4d92b
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
@@ -0,0 +1,329 @@
+//=== ARMMCNaCl.cpp -  Expansion of NaCl pseudo-instructions     --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-mc-nacl"
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> FlagSfiZeroMask("sfi-zero-mask");
+}
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitBICMask(MCStreamer &Out,
+                        unsigned Addr, int64_t  Pred, unsigned Mask) {
+  // bic\Pred \Addr, \Addr, #Mask
+  MCInst BICInst;
+  BICInst.setOpcode(ARM::BICri);
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rD
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rS
+  if (FlagSfiZeroMask) {
+    BICInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    BICInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  BICInst.addOperand(MCOperand::CreateImm(Pred));  // predicate
+  BICInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); // CPSR
+  BICInst.addOperand(MCOperand::CreateReg(0)); // flag out
+  Out.EmitInstruction(BICInst);
+}
+
+static void EmitTST(MCStreamer &Out, unsigned Reg) {
+  // tst \reg, #\MASK typically 0xc0000000
+  const unsigned Mask = 0xC0000000;
+  MCInst TSTInst;
+  TSTInst.setOpcode(ARM::TSTri);
+  TSTInst.addOperand(MCOperand::CreateReg(Reg));  // rS
+  if (FlagSfiZeroMask) {
+    TSTInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    TSTInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  TSTInst.addOperand(MCOperand::CreateImm((int64_t)ARMCC::AL)); // Always
+  TSTInst.addOperand(MCOperand::CreateImm(0)); // flag out
+}
+
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+// 
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 && 
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  int64_t  Pred = Saved[2].getOperand(2).getImm();
+  assert((ARM::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  EmitBICMask(Out, Addr, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble cond=
+  //   sfi_nops_to_force_slot3
+  assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link cond=
+  //   sfi_nops_to_force_slot2
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link cond=
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg cond=
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \cond
+  assert(I == 2 && (ARM::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  int64_t Pred = Saved[0].getOperand(0).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, ARM::LR, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_store_preamble reg cond ---->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg, \cond
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, Reg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_cstore_preamble reg -->
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_data_tst \reg
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE_TST == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+
+  Out.EmitBundleLock();
+  EmitTST(Out, Reg);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+// This is ONLY used for loads into the stack pointer.
+static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 4 &&
+         (ARM::SFI_GUARD_SP_LOAD == Saved[0].getOpcode()) &&
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[1].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[3].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned AddrReg = Saved[0].getOperand(0).getReg();
+  unsigned SpReg = Saved[3].getOperand(0).getReg();
+  int64_t  Pred = Saved[3].getOperand(2).getImm();
+  assert((ARM::SP == SpReg) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  EmitBICMask(Out, AddrReg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[2]);
+  EmitBICMask(Out, SpReg, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+// CustomExpandInstNaClARM -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+
+
+bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out) {
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+  // This routine only executes  if RecurseGuard == 0
+  static bool RecurseGuard = false; 
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  //No recursive calls allowed;
+  if (RecurseGuard) return false;
+
+  unsigned Opc = Inst.getOpcode();
+
+  DEBUG(dbgs() << "CustomExpandInstNaClARM("; Inst.dump(); dbgs() << ")\n");
+
+  // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+  // a stack guard in conjunction with a SFI_DATA_MASK
+
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // one or two instructions.
+  //
+  // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_*
+  // pseudo and the very next one or two instructions are used as arguments to
+  // the Emit*() functions in this file.  This is the reason why we have a
+  // doublely nested switch here.  First, to save the SFI_* pseudo, then to
+  // emit it and the next instruction
+
+  // By default, we only need to save two or three instructions
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base State, no saved instructions.
+    // If the current instruction is a SFI instruction, set the SaveCount
+    // and fall through.
+    switch (Opc) {
+    default:
+      SaveCount = 0; // Nothing to do.
+      return false;  // Handle this Inst elsewhere.
+    case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+      SaveCount = 3;
+      break;
+    case ARM::SFI_DATA_MASK:
+      SaveCount = 0; // Do nothing.
+      break;
+    case ARM::SFI_GUARD_CALL:
+    case ARM::SFI_GUARD_INDIRECT_CALL:
+    case ARM::SFI_GUARD_INDIRECT_JMP:
+    case ARM::SFI_GUARD_RETURN:
+    case ARM::SFI_GUARD_LOADSTORE:
+    case ARM::SFI_GUARD_LOADSTORE_TST:
+      SaveCount = 2;
+      break;
+    case ARM::SFI_GUARD_SP_LOAD:
+      SaveCount = 4;
+      break;
+    }
+  }
+
+  if (I < SaveCount) {
+    // Othewise, save the current Inst and return
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+    // Else fall through to next stat
+  }
+
+  if (SaveCount > 0) { 
+    assert(I == SaveCount && "Bookeeping Error");
+    SaveCount = 0; // Reset for next iteration
+    // The following calls may call Out.EmitInstruction()
+    // which must not again call CustomExpandInst ...
+    // So set RecurseGuard = 1;
+    RecurseGuard = true;
+
+    switch (Saved[0].getOpcode()) {
+    default:  /* No action required */      break;
+    case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case ARM::SFI_DATA_MASK:
+      assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst");
+      break;
+    case ARM::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE_TST:
+      EmitGuardLoadOrStoreTst(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_SP_LOAD:
+      EmitGuardSpLoad(I, Saved, Out);
+      break;
+    }
+    I = 0; // Reset I for next.
+    assert(RecurseGuard && "Illegal Depth");
+    RecurseGuard = false;
+    return true;
+  }
+
+  return false;
+}
+
+} // namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
new file mode 100644
index 0000000000..de7ed50662
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- ARMMCNaCl.h - Prototype for CustomExpandInstNaClARM   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCNACL_H
+#define ARMMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
new file mode 100644
index 0000000000..d39a60d41c
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
@@ -0,0 +1,261 @@
+//=== MipsMCNaCl.cpp -  Expansion of NaCl pseudo-instructions    --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-mc-nacl"
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitMask(MCStreamer &Out,
+                        unsigned Addr, unsigned Mask) {
+  // and \Addr, \Addr, \Mask
+  MCInst MaskInst;
+  MaskInst.setOpcode(Mips::AND);
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Mask));
+  Out.EmitInstruction(MaskInst);
+}
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+//
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 &&
+         (Mips::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (Mips::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  unsigned Mask = Saved[2].getOperand(2).getReg();
+  assert((Mips::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  EmitMask(Out, Addr, Mask);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble --->
+  //   sfi_nops_to_force_slot2
+  assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link --->
+  //   sfi_nops_to_force_slot1
+  //   sfi_code_mask \link \link \maskreg
+  assert(I == 3 && (Mips::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_CALL");
+
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \link \link \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_JMP");
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_load_store_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_LOADSTORE");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock();
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+// CustomExpandInstNaClMips -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+
+
+bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out) {
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+  // This routine only executes  if RecurseGuard == 0
+  static bool RecurseGuard = false;
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  //No recursive calls allowed;
+  if (RecurseGuard) return false;
+
+  unsigned Opc = Inst.getOpcode();
+
+  DEBUG(dbgs() << "CustomExpandInstNaClMips("; Inst.dump(); dbgs() << ")\n");
+
+  // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+  // a stack guard in conjunction with a SFI_DATA_MASK
+
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // one or two instructions.
+  //
+  // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_*
+  // pseudo and the very next one or two instructions are used as arguments to
+  // the Emit*() functions in this file.  This is the reason why we have a
+  // doublely nested switch here.  First, to save the SFI_* pseudo, then to
+  // emit it and the next instruction
+
+  // By default, we only need to save two or three instructions
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base State, no saved instructions.
+    // If the current instruction is a SFI instruction, set the SaveCount
+    // and fall through.
+    switch (Opc) {
+    default:
+      SaveCount = 0; // Nothing to do.
+      return false;  // Handle this Inst elsewhere.
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+    case Mips::SFI_GUARD_CALL:
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      SaveCount = 3;
+      break;
+    case Mips::SFI_DATA_MASK:
+      SaveCount = 0; // Do nothing.
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+    case Mips::SFI_GUARD_RETURN:
+    case Mips::SFI_GUARD_LOADSTORE:
+      SaveCount = 2;
+      break;
+    }
+  }
+
+  if (I < SaveCount) {
+    // Othewise, save the current Inst and return
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+    // Else fall through to next stat
+  }
+
+  if (SaveCount > 0) {
+    assert(I == SaveCount && "Bookeeping Error");
+    SaveCount = 0; // Reset for next iteration
+    // The following calls may call Out.EmitInstruction()
+    // which must not again call CustomExpandInst ...
+    // So set RecurseGuard = 1;
+    RecurseGuard = true;
+
+    switch (Saved[0].getOpcode()) {
+    default:  /* No action required */      break;
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case Mips::SFI_DATA_MASK:
+      assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst");
+      break;
+    case Mips::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    }
+    I = 0; // Reset I for next.
+    assert(RecurseGuard && "Illegal Depth");
+    RecurseGuard = false;
+    return true;
+  }
+  return false;
+}
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
new file mode 100644
index 0000000000..c90502ec33
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- MipsMCNaCl.h - Prototype for CustomExpandInstNaClMips ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCNACL_H
+#define MIPSMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsNaClHeaders.cpp b/lib/Target/Mips/MipsNaClHeaders.cpp
new file mode 100644
index 0000000000..375c287d67
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClHeaders.cpp
@@ -0,0 +1,128 @@
+//===-- MipsNaClHeaders.cpp - Print SFI headers to an Mips .s file --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in Mips assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "MipsNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitMipsSFIHeaders(raw_ostream &O) {
+  O << " # ========================================\n";
+  O << "# Branch: " << FlagSfiBranch << "\n";
+  O << "# Stack: " << FlagSfiStack << "\n";
+  O << "# Store: " << FlagSfiStore << "\n";
+  O << "# Load: " << FlagSfiLoad << "\n";
+
+  O << " # ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0x00000000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  O <<
+    "\t.macro sfi_data_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_code_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jr"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jalr"
+    O <<
+      "\t.macro sfi_indirect_call_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nops_to_force_slot1\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " # ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg1, \\reg2 , \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " # ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp
new file mode 100644
index 0000000000..cce770eebd
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.cpp
@@ -0,0 +1,333 @@
+//===-- MipsNaClRewritePass.cpp - Native Client Rewrite Pass  -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+//
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-sfi"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+unsigned Mips::IndirectBranchMaskReg = Mips::T6;
+unsigned Mips::LoadStoreStackMaskReg = Mips::T7;
+
+namespace {
+  class MipsNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    MipsNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const MipsInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Mips Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxLoadsInBlock(MachineBasicBlock &MBB);
+    bool SandboxStoresInBlock(MachineBasicBlock &MBB);
+    void SandboxLoadStore(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI,
+                      MachineInstr &MI,
+                      int AddrIdx);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void AlignAllJumpTargets(MachineFunction &MF);
+  };
+  char MipsNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JR);
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JALR);
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  return ((MI.getOpcode() == Mips::JAL) || (MI.getOpcode() == Mips::BGEZAL)
+       || (MI.getOpcode() == Mips::BLTZAL));
+;
+}
+
+static bool IsStackMask(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::SFI_DATA_MASK);
+}
+
+static bool NeedSandboxStackChange(const MachineInstr &MI,
+                                   const TargetRegisterInfo *TRI) {
+  if (IsDirectCall(MI) || IsIndirectCall(MI)) {
+    // We check this first because method modifiesRegister
+    // returns true for calls.
+    return false;
+  }
+  return (MI.modifiesRegister(Mips::SP, TRI) && !IsStackMask(MI));
+}
+
+void MipsNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr (one + to get the original, and one more + to get past).
+  MachineBasicBlock::iterator MBBINext = (MBBI++);
+  MachineBasicBlock::iterator MBBINext2 = (MBBI++);
+
+  BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
+          TII->get(Mips::SFI_DATA_MASK), Mips::SP)
+          .addReg(Mips::SP)
+          .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+bool MipsNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+      MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+
+    if (IsReturn(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_RETURN), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsIndirectJump(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_JMP), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsDirectCall(MI)) {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_CALL));
+      Modified = true;
+    } else if (IsIndirectCall(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_CALL), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+/*
+ * Sandboxes a load or store instruction by inserting an appropriate mask
+ * operation before it.
+ */
+void MipsNaClRewritePass::SandboxLoadStore(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      MachineInstr &MI,
+                                      int AddrIdx) {
+  unsigned BaseReg = MI.getOperand(AddrIdx).getReg();
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_GUARD_LOADSTORE), BaseReg)
+      .addReg(BaseReg)
+      .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::LB:
+  case Mips::LBu:
+  case Mips::LH:
+  case Mips::LHu:
+  case Mips::LW:
+  case Mips::LWC1:
+  case Mips::LDC1:
+  case Mips::LL:
+  case Mips::LWL:
+  case Mips::LWR:
+    *AddrIdx = 1;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::SB:
+  case Mips::SH:
+  case Mips::SW:
+  case Mips::SWC1:
+  case Mips::SDC1:
+  case Mips::SWL:
+  case Mips::SWR:
+    *AddrIdx = 1;
+    break;
+
+  case Mips::SC:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == Mips::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+bool MipsNaClRewritePass::SandboxLoadsInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousLoad(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxStoresInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousStore(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+// Make sure all jump targets are aligned
+void MipsNaClRewritePass::AlignAllJumpTargets(MachineFunction &MF) {
+  // JUMP TABLE TARGETS
+  MachineJumpTableInfo *jt_info = MF.getJumpTableInfo();
+  if (jt_info) {
+    const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables();
+    for (unsigned i=0; i < JT.size(); ++i) {
+      std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs;
+
+      for (unsigned j=0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(4);
+      }
+    }
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+                           I != E; ++I) {
+    MachineBasicBlock &MBB = *I;
+    if (MBB.hasAddressTaken())
+      MBB.setAlignment(4);
+  }
+}
+
+bool MipsNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (FlagSfiLoad)
+      Modified |= SandboxLoadsInBlock(MBB);
+    if (FlagSfiStore)
+      Modified |= SandboxStoresInBlock(MBB);
+    if (FlagSfiBranch)
+      Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)
+      Modified |= SandboxStackChangesInBlock(MBB);
+  }
+
+  if (FlagSfiBranch)
+    AlignAllJumpTargets(MF);
+
+  return Modified;
+}
+
+/// createMipsNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createMipsNaClRewritePass() {
+  return new MipsNaClRewritePass();
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.h b/lib/Target/Mips/MipsNaClRewritePass.h
new file mode 100644
index 0000000000..4e729ec985
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.h
@@ -0,0 +1,21 @@
+//===-- MipsNaClRewritePass.h - NaCl Sandboxing Pass    ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPSNACLREWRITEPASS_H
+#define TARGET_MIPSNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+#endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
new file mode 100644
index 0000000000..6b42feee68
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
@@ -0,0 +1,803 @@
+//=== X86MCNaCl.cpp - Expansion of NaCl pseudo-instructions      --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCNaCl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// This option makes it possible to overwrite the x86 jmp mask immediate.
+// Setting it to -1 will effectively turn masking into a nop which will
+// help with linking this code with non-sandboxed libs (at least for x86-32).
+cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32));
+
+static unsigned PrefixSaved = 0;
+static bool PrefixPass = false;
+
+// See the note below where this function is defined.
+namespace llvm {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false);
+}
+
+static void EmitDirectCall(const MCOperand &Op, bool Is64Bit,
+                           MCStreamer &Out) {
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+  CALLInst.addOperand(Op);
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
+                               MCStreamer &Out) {
+  const int JmpMask = FlagSfiX86JmpMask;
+  const unsigned Reg32 = Op.getReg();
+  const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+
+  if (IsCall)
+    Out.EmitBundleAlignEnd();
+
+  Out.EmitBundleLock();
+
+  MCInst ANDInst;
+  ANDInst.setOpcode(X86::AND32ri8);
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateImm(JmpMask));
+  Out.EmitInstruction(ANDInst);
+
+  if (Is64Bit) {
+    MCInst InstADD;
+    InstADD.setOpcode(X86::ADD64rr);
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(X86::R15));
+    Out.EmitInstruction(InstADD);
+  }
+
+  if (IsCall) {
+    MCInst CALLInst;
+    CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r);
+    CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(CALLInst);
+  } else {
+    MCInst JMPInst;
+    JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r);
+    JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(JMPInst);
+  }
+  Out.EmitBundleUnlock();
+}
+
+static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) {
+  MCInst POPInst;
+  POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
+  POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX));
+  Out.EmitInstruction(POPInst);
+
+  if (AmtOp) {
+    assert(!Is64Bit);
+    MCInst ADDInst;
+    unsigned ADDReg = X86::ESP;
+    ADDInst.setOpcode(X86::ADD32ri);
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(*AmtOp);
+    Out.EmitInstruction(ADDInst);
+  }
+
+  MCInst JMPInst;
+  JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
+  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  Out.EmitInstruction(JMPInst);
+}
+
+static void EmitTrap(bool Is64Bit, MCStreamer &Out) {
+  // Rewrite to:
+  //    X86-32:  mov $0, 0
+  //    X86-64:  mov $0, (%r15)
+  unsigned BaseReg = Is64Bit ? X86::R15 : 0;
+  MCInst Tmp;
+  Tmp.setOpcode(X86::MOV32mi);
+  Tmp.addOperand(MCOperand::CreateReg(BaseReg)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1)); // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0)); // IndexReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Value
+
+  Out.EmitInstruction(Tmp);
+}
+
+// Fix a register after being truncated to 32-bits.
+static void EmitRegFix(unsigned Reg64, MCStreamer &Out) {
+  // lea (%rsp, %r15, 1), %rsp
+  MCInst Tmp;
+  Tmp.setOpcode(X86::LEA64r);
+  Tmp.addOperand(MCOperand::CreateReg(Reg64));    // DestReg
+  Tmp.addOperand(MCOperand::CreateReg(Reg64));    // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+  Tmp.addOperand(MCOperand::CreateReg(X86::R15)); // IndexReg
+  Tmp.addOperand(MCOperand::CreateImm(0));        // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+  Out.EmitInstruction(Tmp);
+}
+
+static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp,
+                        MCStreamer &Out) {
+  Out.EmitBundleLock();
+
+  MCInst Tmp;
+  Tmp.setOpcode(Opc);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(ImmOp);
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) {
+  Out.EmitBundleLock();
+
+  MCInst Tmp;
+  Tmp.setOpcode(X86::LEA64_32r);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); // DestReg
+  Tmp.addOperand(MCOperand::CreateReg(X86::RBP)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0));        // IndexReg
+  Tmp.addOperand(ImmOp);                          // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitPrefix(unsigned Opc, MCStreamer &Out) {
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+
+  MCInst PrefixInst;
+  PrefixInst.setOpcode(Opc);
+  PrefixPass = true;
+  Out.EmitInstruction(PrefixInst);
+
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+}
+
+static void EmitMoveRegReg(bool Is64Bit, unsigned ToReg,
+                           unsigned FromReg, MCStreamer &Out) {
+  MCInst Move;
+  Move.setOpcode(Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+  Move.addOperand(MCOperand::CreateReg(ToReg));
+  Move.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(Move);
+}
+
+static void EmitMoveRegImm32(bool Is64Bit, unsigned ToReg,
+                             unsigned Imm32, MCStreamer &Out) {
+  MCInst MovInst;
+  MovInst.setOpcode(X86::MOV32ri);
+  MovInst.addOperand(MCOperand::CreateReg(X86::EBX));
+  MovInst.addOperand(MCOperand::CreateImm(Imm32));
+  Out.EmitInstruction(MovInst);
+}
+
+static void EmitCmove(bool Is64Bit, unsigned ToReg,
+                      unsigned FromReg, MCStreamer &Out) {
+  MCInst CmovInst;
+  CmovInst.setOpcode(Is64Bit ? X86::CMOVE64rr : X86::CMOVE32rr);
+  CmovInst.addOperand(MCOperand::CreateReg(ToReg));
+  CmovInst.addOperand(MCOperand::CreateReg(ToReg));
+  CmovInst.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(CmovInst);
+}
+
+static void EmitClearReg(bool Is64Bit, unsigned Reg, MCStreamer &Out) {
+  MCInst Clear;
+  Clear.setOpcode(X86::XOR32rr);
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Clear.addOperand(MCOperand::CreateReg(Reg));
+  Out.EmitInstruction(Clear);
+}
+
+static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) {
+  unsigned Reg32 = getX86SubSuperRegister_(Reg64, MVT::i32);
+  EmitMoveRegReg(false, Reg32, Reg32, Out);
+}
+
+static void EmitPushReg(bool Is64Bit, unsigned FromReg, MCStreamer &Out) {
+  MCInst Push;
+  Push.setOpcode(Is64Bit ? X86::PUSH64r : X86::PUSH32r);
+  Push.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(Push);
+}
+
+static void EmitPopReg(bool Is64Bit, unsigned ToReg, MCStreamer &Out) {
+  MCInst Pop;
+  Pop.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
+  Pop.addOperand(MCOperand::CreateReg(ToReg));
+  Out.EmitInstruction(Pop);
+}
+
+static void EmitLoad(bool Is64Bit,
+                     unsigned DestReg,
+                     unsigned BaseReg,
+                     unsigned Scale,
+                     unsigned IndexReg,
+                     unsigned Offset,
+                     unsigned SegmentReg,
+                     MCStreamer &Out) {
+  // Load DestReg from address BaseReg + Scale * IndexReg + Offset
+  MCInst Load;
+  Load.setOpcode(Is64Bit ? X86::MOV64rm : X86::MOV32rm);
+  Load.addOperand(MCOperand::CreateReg(DestReg));
+  Load.addOperand(MCOperand::CreateReg(BaseReg));
+  Load.addOperand(MCOperand::CreateImm(Scale));
+  Load.addOperand(MCOperand::CreateReg(IndexReg));
+  Load.addOperand(MCOperand::CreateImm(Offset));
+  Load.addOperand(MCOperand::CreateReg(SegmentReg));
+  Out.EmitInstruction(Load);
+}
+
+// Utility function for storing done by setjmp.
+// Creates a store from Reg into the address PtrReg + Offset.
+static void EmitStore(bool Is64Bit,
+                      unsigned BaseReg,
+                      unsigned Scale,
+                      unsigned IndexReg,
+                      unsigned Offset,
+                      unsigned SegmentReg,
+                      unsigned SrcReg,
+                      MCStreamer &Out) {
+  // Store SrcReg to address BaseReg + Scale * IndexReg + Offset
+  MCInst Store;
+  Store.setOpcode(Is64Bit ? X86::MOV64mr : X86::MOV32mr);
+  Store.addOperand(MCOperand::CreateReg(BaseReg));
+  Store.addOperand(MCOperand::CreateImm(Scale));
+  Store.addOperand(MCOperand::CreateReg(IndexReg));
+  Store.addOperand(MCOperand::CreateImm(Offset));
+  Store.addOperand(MCOperand::CreateReg(SegmentReg));
+  Store.addOperand(MCOperand::CreateReg(SrcReg));
+  Out.EmitInstruction(Store);
+}
+
+static void EmitAndRegReg(bool Is64Bit, unsigned DestReg,
+                          unsigned SrcReg, MCStreamer &Out) {
+  MCInst AndInst;
+  AndInst.setOpcode(X86::AND32rr);
+  AndInst.addOperand(MCOperand::CreateReg(DestReg));
+  AndInst.addOperand(MCOperand::CreateReg(DestReg));
+  AndInst.addOperand(MCOperand::CreateReg(SrcReg));
+  Out.EmitInstruction(AndInst);
+}
+
+static bool SandboxMemoryRef(MCInst *Inst,
+                             unsigned *IndexReg,
+                             MCStreamer &Out) {
+  for (unsigned i = 0, last = Inst->getNumOperands(); i < last; i++) {
+    if (!Inst->getOperand(i).isReg() ||
+        Inst->getOperand(i).getReg() != X86::PSEUDO_NACL_SEG) {
+      continue;
+    }
+    // Return the index register that will need to be truncated.
+    // The order of operands on a memory reference is always:
+    // (BaseReg, ScaleImm, IndexReg, DisplacementImm, SegmentReg),
+    // So if we found a match for a segment register value, we know that
+    // the index register is exactly two operands prior.
+    *IndexReg = Inst->getOperand(i - 2).getReg();
+    // Remove the PSEUDO_NACL_SEG annotation.
+    Inst->getOperand(i).setReg(0);
+    return true;
+  }
+  return false;
+}
+
+static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) {
+  Out.EmitBundleAlignEnd();
+  Out.EmitBundleLock();
+
+  MCInst LeaInst;
+  LeaInst.setOpcode(X86::LEA32r);
+  LeaInst.addOperand(MCOperand::CreateReg(X86::EAX));    // DestReg
+  LeaInst.addOperand(Inst.getOperand(0)); // BaseReg
+  LeaInst.addOperand(Inst.getOperand(1)); // Scale
+  LeaInst.addOperand(Inst.getOperand(2)); // IndexReg
+  LeaInst.addOperand(Inst.getOperand(3)); // Offset
+  LeaInst.addOperand(Inst.getOperand(4)); // SegmentReg
+  Out.EmitInstruction(LeaInst);
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(X86::CALLpcrel32);
+  MCContext &context = Out.getContext();
+  const MCSymbolRefExpr *expr =
+    MCSymbolRefExpr::Create(
+      context.GetOrCreateSymbol(StringRef("___tls_get_addr")),
+      MCSymbolRefExpr::VK_PLT, context);
+  CALLInst.addOperand(MCOperand::CreateExpr(expr));
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+
+static void EmitREST(const MCInst &Inst, unsigned Reg32, bool IsMem, MCStreamer &Out) {
+  unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+  Out.EmitBundleLock();
+  if (!IsMem) {
+    EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out);
+  } else {
+    unsigned IndexReg;
+    MCInst SandboxedInst = Inst;
+    if (SandboxMemoryRef(&SandboxedInst, &IndexReg, Out)) {
+      EmitRegTruncate(IndexReg, Out);
+    }
+    EmitLoad(false,
+             Reg32,
+             SandboxedInst.getOperand(0).getReg(),  // BaseReg
+             SandboxedInst.getOperand(1).getImm(),  // Scale
+             SandboxedInst.getOperand(2).getReg(),  // IndexReg
+             SandboxedInst.getOperand(3).getImm(),  // Offset
+             SandboxedInst.getOperand(4).getReg(),  // SegmentReg
+             Out);
+  }
+
+  EmitRegFix(Reg64, Out);
+  Out.EmitBundleUnlock();
+}
+
+// Does the x86 platform specific work for setjmp.
+// It expects that a pointer to a JMP_BUF in %ecx/%rdi, and that the return
+// address is in %edx/%rdx.
+// The JMP_BUF is a structure that has the maximum size over all supported
+// architectures.  The callee-saves registers plus [er]ip and [er]sp are stored
+// into the JMP_BUF.
+static void EmitSetjmp(bool Is64Bit, MCStreamer &Out) {
+  unsigned JmpBuf = Is64Bit ? X86::RDI : X86::ECX;
+  unsigned RetAddr = Is64Bit ? X86::RDX : X86::EDX;
+  if (Is64Bit) {
+    unsigned BasePtr = X86::R15;
+    unsigned Segment = X86::PSEUDO_NACL_SEG;
+    // Save the registers.
+    EmitStore(true, BasePtr, 1, JmpBuf,  0, Segment, X86::RBX, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf,  8, Segment, X86::RBP, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 16, Segment, X86::RSP, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 24, Segment, X86::R12, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 32, Segment, X86::R13, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 40, Segment, X86::R14, Out);
+    EmitStore(true, BasePtr, 1, JmpBuf, 48, Segment, X86::RDX, Out);
+  } else {
+    // Save the registers.
+    EmitStore(false, JmpBuf, 1, 0,  0, 0, X86::EBX, Out);
+    EmitStore(false, JmpBuf, 1, 0,  4, 0, X86::EBP, Out);
+    EmitStore(false, JmpBuf, 1, 0,  8, 0, X86::ESP, Out);
+    EmitStore(false, JmpBuf, 1, 0, 12, 0, X86::ESI, Out);
+    EmitStore(false, JmpBuf, 1, 0, 16, 0, X86::EDI, Out);
+    EmitStore(false, JmpBuf, 1, 0, 20, 0, X86::EDX, Out);
+  }
+  // Return 0.
+  EmitClearReg(false, X86::EAX, Out);
+}
+
+// Does the x86 platform specific work for longjmp other than normalizing the
+// return parameter (returns of zero are changed to return 1 in the caller).
+// It expects that a pointer to a JMP_BUF in %ecx/%rdi, and that the return
+// value is in %eax.
+// The JMP_BUF is a structure that has the maximum size over all supported
+// architectures.  The saved registers are restored from the JMP_BUF.
+static void EmitLongjmp(bool Is64Bit, MCStreamer &Out) {
+  unsigned JmpBuf = Is64Bit ? X86::RDI : X86::ECX;
+  // If the return value was 0, make it 1.
+  EmitAndRegReg(false, X86::EAX, X86::EAX, Out);
+  EmitMoveRegImm32(false, X86::EBX, 1, Out);
+  EmitCmove(false, X86::EAX, X86::EBX, Out);
+  if (Is64Bit) {
+    unsigned BasePtr = X86::R15;
+    unsigned Segment = X86::PSEUDO_NACL_SEG;
+    // Restore the registers.
+    EmitLoad(true, X86::RBX, BasePtr, 1, JmpBuf,  0, Segment, Out);
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf,  8, Segment, Out);
+    // restbp
+    Out.EmitBundleLock();
+    EmitRegTruncate(X86::RBP, Out);
+    EmitRegFix(X86::RBP, Out);
+    Out.EmitBundleUnlock();
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf, 16, Segment, Out);
+    // restsp
+    Out.EmitBundleLock();
+    EmitRegTruncate(X86::RSP, Out);
+    EmitRegFix(X86::RSP, Out);
+    Out.EmitBundleUnlock();
+    EmitLoad(true, X86::R12, BasePtr, 1, JmpBuf, 24, Segment, Out);
+    EmitLoad(true, X86::R13, BasePtr, 1, JmpBuf, 32, Segment, Out);
+    EmitLoad(true, X86::R14, BasePtr, 1, JmpBuf, 40, Segment, Out);
+    EmitLoad(true, X86::RDX, BasePtr, 1, JmpBuf, 48, Segment, Out);
+  } else {
+    // Restore the registers.
+    EmitLoad(false, X86::EBX, JmpBuf, 1, 0,  0, 0, Out);
+    EmitLoad(false, X86::EBP, JmpBuf, 1, 0,  4, 0, Out);
+    EmitLoad(false, X86::ESP, JmpBuf, 1, 0,  8, 0, Out);
+    EmitLoad(false, X86::ESI, JmpBuf, 1, 0, 12, 0, Out);
+    EmitLoad(false, X86::EDI, JmpBuf, 1, 0, 16, 0, Out);
+    EmitLoad(false, X86::ECX, JmpBuf, 1, 0, 20, 0, Out);
+  }
+  // Jmp to the saved return address.
+  MCInst JMPInst;
+  JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
+  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  Out.EmitInstruction(JMPInst);
+}
+
+namespace llvm {
+// CustomExpandInstNaClX86 -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+  unsigned Opc = Inst.getOpcode();
+  DEBUG(dbgs() << "CustomExpandInstNaClX86("; Inst.dump(); dbgs() << ")\n");
+  switch (Opc) {
+  case X86::LOCK_PREFIX:
+  case X86::REP_PREFIX:
+  case X86::REPNE_PREFIX:
+  case X86::REX64_PREFIX:
+    // Ugly hack because LLVM AsmParser is not smart enough to combine
+    // prefixes back into the instruction they modify.
+    if (PrefixPass) {
+      PrefixPass = false;
+      PrefixSaved = 0;
+      return false;
+    }
+    assert(PrefixSaved == 0);
+    PrefixSaved = Opc;
+    return true;
+  case X86::NACL_TRAP32:
+    assert(PrefixSaved == 0);
+    EmitTrap(false, Out);
+    return true;
+  case X86::NACL_TRAP64:
+    assert(PrefixSaved == 0);
+    EmitTrap(true, Out);
+    return true;
+  case X86::NACL_CALL32d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_CALL64d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), true, Out);
+    return true;
+  case X86::NACL_CALL32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, true, Out);
+    return true;
+  case X86::NACL_CALL64r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, true, Out);
+    return true;
+  case X86::NACL_JMP32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, false, Out);
+    return true;
+  case X86::NACL_TLS_addr32:
+    assert(PrefixSaved == 0);
+    EmitTLSAddr32(Inst, Out);
+    return true;
+  case X86::NACL_JMP64r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, false, Out);
+    return true;
+  case X86::NACL_RET32:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, false, Out);
+    return true;
+  case X86::NACL_RET64:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, true, Out);
+    return true;
+  case X86::NACL_RETI32:
+    assert(PrefixSaved == 0);
+    EmitRet(&Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_ASPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ASPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ANDSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::AND32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SPADJi32:
+    assert(PrefixSaved == 0);
+    EmitSPAdj(Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_RESTBPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, true, Out);
+    return true;
+  case X86::NACL_RESTBPr:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, false, Out);
+    return true;
+  case X86::NACL_RESTSPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, true, Out);
+    return true;
+  case X86::NACL_RESTSPr:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, false, Out);
+    return true;
+  // Intrinsics for eliminating platform specific .s code from the client
+  // side link.  These are recognized in X86InstrNaCl.td.
+  case X86::NACL_SETJ32:
+    EmitSetjmp(false, Out);
+    return true;
+  case X86::NACL_SETJ64:
+    EmitSetjmp(true, Out);
+    return true;
+  case X86::NACL_LONGJ32:
+    EmitLongjmp(false, Out);
+    return true;
+  case X86::NACL_LONGJ64:
+    EmitLongjmp(true, Out);
+    return true;
+  }
+
+  unsigned IndexReg;
+  MCInst SandboxedInst = Inst;
+  if (SandboxMemoryRef(&SandboxedInst, &IndexReg, Out)) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+
+    Out.EmitBundleLock();
+    EmitRegTruncate(IndexReg, Out);
+    if (PrefixLocal)
+      EmitPrefix(PrefixLocal, Out);
+    Out.EmitInstruction(SandboxedInst);
+    Out.EmitBundleUnlock();
+    return true;
+  }
+
+  if (PrefixSaved) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+    EmitPrefix(PrefixLocal, Out);
+  }
+  return false;
+}
+
+} // namespace llvm
+
+
+
+
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+//
+// This is an exact copy of getX86SubSuperRegister from X86RegisterInfo.h
+// We cannot use the original because it is part of libLLVMX86CodeGen,
+// which cannot be a dependency of this module (libLLVMX86Desc).
+//
+// However, in all likelyhood, the real getX86SubSuperRegister will
+// eventually be moved to MCTargetDesc, and then this copy can be
+// removed.
+
+namespace llvm {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return Reg;
+  case MVT::i8:
+    if (High) {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case MVT::i16:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case MVT::i32:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case MVT::i64:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+
+  return Reg;
+}
+}
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.h b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
new file mode 100644
index 0000000000..01b400d4d9
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
@@ -0,0 +1,19 @@
+//===-- X86MCNaCl.h - Prototype for CustomExpandInstNaClX86   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCNACL_H
+#define X86MCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/X86/X86InstrNaCl.td b/lib/Target/X86/X86InstrNaCl.td
new file mode 100644
index 0000000000..ecaabc643b
--- /dev/null
+++ b/lib/Target/X86/X86InstrNaCl.td
@@ -0,0 +1,433 @@
+//====- X86InstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the modifications to the X86 instruction set needed for
+// Native Client code generation.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NaCl specific DAG Nodes.
+//
+
+//===----------------------------------------------------------------------===//
+//
+//                       Native Client Pseudo-Instructions
+//
+// These instructions implement the Native Client pseudo-instructions, such
+// as nacljmp and naclasp.
+//
+// TableGen and MC consider these to be "real" instructions. They can be
+// parsed by the AsmParser and emitted by the AsmStreamer as if they
+// were just regular instructions. They are not marked "Pseudo" because
+// this would imply isCodeGenOnly=1, which would stop them from being
+// parsed by the assembler.
+//
+// These instructions cannot be encoded (written into an object file) by the
+// MCCodeEmitter. Instead, during direct object emission, they get lowered to
+// a sequence of streamer emits. (see X86InstrNaCl.cpp)
+//
+// These instructions should not be used in CodeGen. They have no pattern
+// and lack CodeGen metadata. Instead, the X86NaClRewritePass should
+// generate these instructions after CodeGen is finished.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// 32-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI32<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In32BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP32  : NaClPI32<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET32  : NaClPI32<(outs), (ins), "naclret">;
+  def NACL_RETI32 : NaClPI32<(outs), (ins i16imm:$amt), "naclreti\t$amt">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP32r : NaClPI32<(outs), (ins GR32:$dst), "nacljmp\t$dst">;
+}
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL32d : NaClPI32<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL32r : NaClPI32<(outs), (ins GR32:$dst),
+                     "naclcall\t$dst">;
+}
+
+// nacltlsaddr32 gets rewritten to:
+//     .bundle_align_end
+//     .bundle_lock
+//     leal\t$sym@TLSGD, %eax
+//     call\t___tls_get_addr@PLT
+//     .bundle_unlock
+// (The linker expects the leal+call sequence to be directly adjacent)
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP],
+    isAsmParserOnly = 1 in
+def NACL_TLS_addr32 : NaClPI32<(outs), (ins i32mem:$sym),
+                      "nacltlsaddr32\t$sym">;
+
+//===----------------------------------------------------------------------===//
+// 64-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI64<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In64BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP64  : NaClPI64<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET64  : NaClPI64<(outs), (ins), "naclret">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                    "nacljmp\t{$dst, $rZP|$rZP, $dst}">;
+}
+
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL64d : NaClPI64<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                     "naclcall\t$dst,$rZP">;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], isAsmParserOnly = 1 in {
+  def NACL_ASPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ASPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ANDSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclandsp{q}\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], Uses = [RBP], isAsmParserOnly = 1 in {
+  def NACL_SPADJi32  : NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                       "naclspadj\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], isAsmParserOnly = 1 in {
+  def NACL_RESTSPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTSPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+}
+
+def : MnemonicAlias<"naclrestsp", "naclrestsp_noflags">;
+
+let Defs = [RBP], isAsmParserOnly = 1 in {
+  def NACL_RESTBPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTBPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Code Generator Instructions (isCodeGenOnly == 1)
+//
+// These instructions exists to make CodeGen work with Native Client's
+// modifications.
+//
+// Many of these instructions exist because of limitations in CodeGen
+// or TableGen, and may become unnecessary in the future.
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 32-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// To avoid a naming conflict between call/naclcall, we have to
+// disable the real CALLpcrel32 and CALL32r instructions when targeting
+// for NaCl. Thus, they need to be produced here.
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+
+    def NACL_CG_CALLpcrel32 : I<0, Pseudo,
+                              (outs), (ins i32imm_pcrel:$dst, variable_ops),
+                              "naclcall\t$dst", []>,
+                              Requires<[IsNaCl, In32BitMode]>;
+    def NACL_CG_CALL32r     : I<0, Pseudo,
+                              (outs), (ins GR32:$dst, variable_ops),
+                              "naclcall\t$dst", [(X86call GR32:$dst)]>,
+                              Requires<[IsNaCl, In32BitMode]>;
+}
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALLpcrel32 tglobaladdr:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALLpcrel32 texternalsym:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (NACL_CG_CALLpcrel32 imm:$dst)>,
+          Requires<[IsNaCl, In32BitMode, CallImmAddr]>;
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 64-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// Because pointers are 32-bit on X86-64 Native Client, we need to
+// produce new versions of the JMP64/CALL64 instructions which can accept
+// addresses which are i32 instead of i64.
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def NACL_CG_JMP64r     : I<0, Pseudo, (outs), (ins GR32:$dst, variable_ops),
+                           "nacljmp\t$dst",
+                           [(brind GR32:$dst)]>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    def NACL_CG_CALL64pcrel32 : I<0, Pseudo, (outs),
+                                (ins i32imm_pcrel:$dst, variable_ops),
+                                "naclcall\t$dst", []>,
+                                Requires<[IsNaCl, In64BitMode]>;
+
+    def NACL_CG_CALL64r       : I<0, Pseudo, (outs), (ins GR32:$dst, variable_ops),
+                                "naclcall\t$dst,%r15",
+                                [(X86call GR32:$dst)]>,
+                                Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALL64pcrel32 tglobaladdr:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALL64pcrel32 texternalsym:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+
+// Tail calls
+// Also needed due to the i64 / i32 pointer problem.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+  def NACL_CG_TCRETURNdi64 : I<0, Pseudo, (outs),
+                             (ins i32imm_pcrel:$dst, i32imm:$offset, 
+                             variable_ops),
+                             "#TC_RETURN $dst $offset", []>,
+                          Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TCRETURNri64 : I<0, Pseudo, (outs),
+                            (ins GR32_TC_64:$dst, i32imm:$offset,
+                             variable_ops),
+                            "#TC_RETURN $dst $offset", []>,
+                            Requires<[IsNaCl, In64BitMode]>;
+
+  def NACL_CG_TAILJMPd64 : I<0, Pseudo, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
+                           "jmp\t$dst  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TAILJMPr64 : I<0, Pseudo, (outs),
+                           (ins GR32_TC_64:$dst, variable_ops),
+                           "nacljmp\t$dst,%r15  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret GR32_TC_64:$dst, imm:$off),
+          (NACL_CG_TCRETURNri64 GR32_TC_64:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+// ELF TLS Support
+
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def NACL_CG_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                         ".bundle_align_end"
+                         ".bundle_lock"
+                         "leal\t$sym, %eax; "
+                         "call\t___tls_get_addr@PLT"
+                         ".bundle_unlock",
+                         [(X86tlsaddr tls32addr:$sym)]>,
+                         Requires<[In32BitMode, IsNaCl]>;
+
+// These are lowered in X86NaClRewritePass.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in {
+def NACL_CG_GD_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_LE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_IE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+// For mtls-use-call.
+def NACL_CG_LE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+def NACL_CG_IE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+}
+
+let usesCustomInserter = 1, Defs = [EFLAGS] in
+def NACL_CG_VAARG_64 : I<0, Pseudo,
+                     (outs GR32:$dst),
+                     (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                     "#NACL_VAARG_64 $dst, $ap, $size, $mode, $align",
+                     [(set GR32:$dst,
+                     (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                     (implicit EFLAGS)]>,
+                     Requires<[IsNaCl, In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// NativeClient intrinsics
+// These provide the ability to implement several low-level features without
+// having to link native ASM code on the client.
+// These need to be kept in sync with in lib/Target/ARM/ARMInstrInfo.td and
+// lib/Target/X86/X86InstrNaCl.cpp.
+// TODO(sehr): Separate this code to allow NaCl and non-NaCl versions.
+
+// Saves all the callee-saves registers, [er]sp, and [er]ip to the JMP_BUF
+// structure pointed to by 4(%esp) or rdi.  The JMP_BUF structure is the
+// maximum size over all supported architectures.  The MC expansions happen
+// in X86InstrNaCl.cpp.
+let Uses = [ECX, RDX], Defs = [EAX, EFLAGS] in {
+  def NACL_SETJ32 : I<0, Pseudo, (outs), (ins),
+                     "movl %ebx, 0(%ecx); "
+                     "movl %ebp, 4(%ecx); "
+                     "movl %esp, 8(%ecx); "
+                     "movl %esi, 12(%ecx); "
+                     "movl %edi, 16(%ecx); "
+                     "movl %edx, 20(%ecx); "
+                     "xorl %eax, %eax; ",
+                     [(set EAX, (int_nacl_setjmp ECX, EDX))]>,
+                     Requires<[IsNaCl, In32BitMode]>;
+}
+let Uses = [EDI, RDX], Defs = [EAX, EFLAGS] in {
+  def NACL_SETJ64 : I<0, Pseudo, (outs), (ins),
+                      "movq %rbx, %nacl:0(%r15, %rdi); "
+                      "movq %rbp, %nacl:8(%r15, %rdi); "
+                      "movq %rsp, %nacl:16(%r15, %rdi); "
+                      "movq %r12, %nacl:24(%r15, %rdi); "
+                      "movq %r13, %nacl:32(%r15, %rdi); "
+                      "movq %r14, %nacl:40(%r15, %rdi); "
+                      "movq %rdx, %nacl:48(%r15, %rdi); "
+                      "xorl %eax, %eax; ",
+                      [(set EAX, (int_nacl_setjmp EDI, EDX))]>,
+                      Requires<[IsNaCl, In64BitMode]>;
+}
+
+// Restores all the callee-saves registers, [er]sp, and [er]ip from the JMP_BUF
+// structure pointed to by 4(%esp) or %rdi.  Returns the value in 8(%esp) or
+// %rsi at entry.  This implements the tail of longjmp, with the normalization
+// of the return value (if the caller passes zero to longjmp, it should return
+// 1) done in the caller. The MC expansions happen in X86InstrNaCl.cpp.
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    Uses = [EAX, ECX] in {
+  def NACL_LONGJ32 : I<0, Pseudo, (outs), (ins),
+                       "movl $$1, %ebx; "
+                       "andl %eax, %eax; "
+                       "cmovzl %ebx, %eax; "
+                       "movl 0(%ecx), %ebx; "
+                       "movl 4(%ecx), %ebp; "
+                       "movl 8(%ecx), %esp; "
+                       "movl 12(%ecx), %esi; "
+                       "movl 16(%ecx), %edi; "
+                       "movl 20(%ecx), %ecx; "
+                       "nacljmp %ecx; ",
+                       [(int_nacl_longjmp ECX, EAX)]>,
+                       Requires<[IsNaCl, In32BitMode]>, TB;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    Uses = [EAX, EDI, R15] in {
+  def NACL_LONGJ64 : I<0, Pseudo, (outs), (ins),
+                       "movl $$1, %ebx; "
+                       "andl %eax, %eax; "
+                       "cmovzl %ebx, %eax; "
+                       "movq %nacl:0(%r15, %edi), %rbx; "
+                       "movq %nacl:8(%r15, %edi), %rdx; "
+                       "naclrestbp %edx, %r15; "
+                       "movq %nacl:16(%r15, %edi), %rdx; "
+                       "naclrestsp %edx, %r15; "
+                       "movq %nacl:24(%r15, %edi), %r12; "
+                       "movq %nacl:32(%r15, %edi), %r13; "
+                       "movq %nacl:40(%r15, %edi), %r14; "
+                       "movq %nacl:48(%r15, %edi), %rcx; "
+                       "nacljmp %ecx, %r15; ",
+                       [(int_nacl_longjmp EDI, EAX)]>,
+                       Requires<[IsNaCl, In64BitMode]>, TB;
+}
diff --git a/lib/Target/X86/X86NaClJITInfo.cpp b/lib/Target/X86/X86NaClJITInfo.cpp
new file mode 100644
index 0000000000..e5ccbf960d
--- /dev/null
+++ b/lib/Target/X86/X86NaClJITInfo.cpp
@@ -0,0 +1,393 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target on Native Client
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86NaClJITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include <cstdlib>
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Valgrind.h"
+#ifdef __native_client__
+#include <nacl/nacl_dyncode.h>
+#endif
+
+using namespace llvm;
+
+extern cl::opt<int> FlagSfiX86JmpMask;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#elif defined(__pnacl__)
+#warning "PNaCl does not yet have JIT support"
+#else
+#error "Should not be building X86NaClJITInfo on non-x86"
+// TODO(dschuff): make this work under pnacl self-build?
+#endif
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+
+void X86NaClJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  // We don't know the original instruction boundaries, so we replace the
+  // whole bundle.
+  uint8_t buf[kBundleSize];
+  buf[0] = 0xE9;                // Emit JMP opcode.
+  intptr_t OldAddr = ((uintptr_t)Old + 1);
+  uint32_t NewOffset = (intptr_t)New - OldAddr - 4;// PC-relative offset of new
+  *((uint32_t*)(buf + 1)) = NewOffset;
+  memcpy(buf + 5, getNopSequence(kBundleSize - 5), kBundleSize - 5);
+
+#ifdef __native_client__
+  if(nacl_dyncode_create(Old, buf, kBundleSize)) {
+    report_fatal_error("machine code replacement failed");
+  }
+#endif
+
+  // X86 doesn't need to invalidate the processor cache, so just invalidate
+  // Valgrind's cache directly.
+  sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+extern "C" {
+#if defined(X86_64_JIT) || defined(__pnacl__) || !defined(__native_client__)
+void X86NaClCompilationCallback(void) {
+//TODO(dschuff): implement for X86-64
+}
+void X86NaClCompilationCallback_fastcc(void) {
+//TODO(dschuff): implement for X86-64
+}
+#else
+// Chrome system requirements include PIII, So SSE is present.
+// For now this is the same as X86CompilationCallback_SSE
+// In the future we could emit this rather than defining it with asm, for
+// compatibility with pnacl self-build
+// Also omit CFI junk (which is #defined away)
+
+// The difference between the 2 wrapper variants is that the first returns
+// through ecx and the 2nd returns through eax. The fastcc calling convention
+// uses ecx to pass arguments, and the C calling convention uses eax to pass
+// arguments with the 'inreg' attribute, so we make sure not to clobber it.
+// Returning through eax for fastcc and ecx for C clobbers the 'nest' parameter
+// breaking nested functions (which are not supported by clang in any case).
+
+void X86NaClCompilationCallback(void);
+asm(
+    ".text\n"
+    ".align 32\n"
+    ".globl " ASMPREFIX "X86NaClCompilationCallback\n"
+    TYPE_FUNCTION(X86NaClCompilationCallback)
+    ASMPREFIX "X86NaClCompilationCallback:\n"
+    "pushl %ebp\n"
+    "movl    %esp, %ebp\n"    // Standard prologue
+    "pushl   %eax\n"
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    "pushl   %ecx\n"
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86NaClCompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    "movaps  32(%esp), %xmm2\n"
+    "movaps  16(%esp), %xmm1\n"
+    "movaps  (%esp), %xmm0\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    "subl    $12, %esp\n"
+    "popl    %ecx\n"
+    "popl    %edx\n"
+    "popl    %eax\n"
+    "popl    %ebp\n"
+    "popl %ecx\n"
+    "nacljmp %ecx\n"
+    SIZE(X86NaClCompilationCallback)
+);
+
+
+
+void X86NaClCompilationCallback_fastcc(void);
+asm(
+    ".text\n"
+    ".align 32\n"
+    ".globl " ASMPREFIX "X86NaClCompilationCallback_fastcc\n"
+    TYPE_FUNCTION(X86NaClCompilationCallback_fastcc)
+    ASMPREFIX "X86NaClCompilationCallback_fastcc:\n"
+    "pushl %ebp\n"
+    "movl    %esp, %ebp\n"    // Standard prologue
+    "pushl   %eax\n"
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    "pushl   %ecx\n"
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86NaClCompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    "movaps  32(%esp), %xmm2\n"
+    "movaps  16(%esp), %xmm1\n"
+    "movaps  (%esp), %xmm0\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    "subl    $12, %esp\n"
+    "popl    %ecx\n"
+    "popl    %edx\n"
+    "popl    %eax\n"
+    "popl    %ebp\n"
+    "popl %eax\n"
+    "nacljmp %eax\n"
+    SIZE(X86NaClCompilationCallback_fastcc)
+);
+#endif
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call.  This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+
+// A stub has the following format:
+// | Jump opcode (1 byte) | Jump target +22 bytes | 3 bytes of NOPs
+//   | 18 bytes of NOPs | 1 halt | Call opcode (1 byte) | call target
+// The jump targets the call at the end of the bundle, which targets the
+// compilation callback. Once the compilation callback JITed the target
+// function it replaces the first 8 bytes of the stub in a single atomic
+// operation, retargeting the jump at the JITed function.
+
+static uint8_t *BundleRewriteBuffer;
+
+static void LLVM_ATTRIBUTE_USED
+X86NaClCompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+  // Get the return address from where the call instruction left it
+  intptr_t *RetAddrLoc = &StackPtr[1];
+  assert(*RetAddrLoc == RetAddr &&
+         "Could not find return address on the stack!");
+
+  // TODO: take a lock here. figure out whether it has to be the JIT lock or
+  // can be our own lock (or however we handle thread safety)
+#if 0
+  DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+               << " ESP=" << (void*)StackPtr << "\n");
+#endif
+
+  intptr_t StubStart = RetAddr - 32;
+  // This probably isn't necessary. I believe the corresponding code in
+  // X86JITInfo is vestigial, and AFAICT no non-stub calls to the compilation
+  // callback are generated anywhere. Still it doesn't hurt as a sanity check
+  bool isStub = *((unsigned char*)StubStart) == 0xE9 &&
+      *((int32_t*)(StubStart + 1)) == 22 &&
+      *((unsigned char*)(StubStart + 26)) == 0xF4;
+
+  assert(isStub && "NaCl doesn't support rewriting non-stub callsites yet");
+
+  // Backtrack so RetAddr points inside the stub (so JITResolver can find
+  // which function to compile)
+  RetAddr -= 4;
+
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+  // Rewrite the stub's call target, so that we don't end up here every time we
+  // execute the call.
+
+  // Get the first 8 bytes of the stub
+  memcpy(BundleRewriteBuffer, (void *)(StubStart), 8);
+  // Point the jump at the newly-JITed code
+  *((intptr_t *)(BundleRewriteBuffer + 1)) = NewVal - (StubStart + 5);
+
+  // Copy the new code
+#ifdef __native_client__
+  if(nacl_dyncode_modify((void *)StubStart, BundleRewriteBuffer, 8)) {
+    report_fatal_error("dyncode_modify failed");
+  }
+#endif
+  // TODO: release the lock
+
+  // Change our return address to execute the new jump
+  *RetAddrLoc = StubStart;
+}
+
+}
+
+const int X86NaClJITInfo::kBundleSize;
+
+TargetJITInfo::LazyResolverFn
+X86NaClJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return X86NaClCompilationCallback;
+}
+
+X86NaClJITInfo::X86NaClJITInfo(X86TargetMachine &tm) : X86JITInfo(tm) {
+  // FIXME: does LLVM have some way of doing static initialization?
+#ifndef __pnacl__
+  if(posix_memalign((void **)&BundleRewriteBuffer, kBundleSize, kBundleSize))
+    report_fatal_error("Could not allocate aligned memory");
+#else
+  BundleRewriteBuffer = NULL;
+#endif
+
+  NopString = new uint8_t[kBundleSize];
+  for (int i = 0; i < kBundleSize; i++) NopString[i] = 0x90;
+  X86Hlt.ins = new uint8_t[1];
+  X86Hlt.ins[0] = 0xf4;
+  X86Hlt.len = 1;
+}
+
+X86NaClJITInfo::~X86NaClJITInfo() {
+  delete [] NopString;
+  delete [] X86Hlt.ins;
+}
+
+TargetJITInfo::StubLayout X86NaClJITInfo::getStubLayout() {
+  // NaCl stubs must be full bundles because calls still have to be aligned
+  // even if they don't return
+  StubLayout Result = {kBundleSize, kBundleSize};
+  return Result;
+}
+
+
+void *X86NaClJITInfo::emitFunctionStub(const Function* F, void *Target,
+                                       JITCodeEmitter &JCE) {
+  bool TargetsCC = Target == (void *)(intptr_t)X86NaClCompilationCallback;
+
+  // If we target the compilation callback, swap it for a different one for
+  // functions using the fastcc calling convention
+  if(TargetsCC && F->getCallingConv() == CallingConv::Fast) {
+    Target = (void *)(intptr_t)X86NaClCompilationCallback_fastcc;
+  }
+
+  void *Result = (void *)JCE.getCurrentPCValue();
+  assert(RoundUpToAlignment((uintptr_t)Result, kBundleSize) == (uintptr_t)Result
+         && "Unaligned function stub");
+  if (!TargetsCC) {
+    // Jump to the target
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+    // Fill with Nops.
+    emitNopPadding(JCE, 27);
+  } else {
+    // Jump over 22 bytes
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE(22);
+    // emit 3-bytes of nop to ensure an instruction boundary at 8 bytes
+    emitNopPadding(JCE, 3);
+    // emit 18 bytes of nop
+    emitNopPadding(JCE, 18);
+    // emit 1 byte of halt. This helps CompilationCallback tell whether
+    // we came from a stub or not
+    JCE.emitByte(X86Hlt.ins[0]);
+    // emit a call to the compilation callback
+    JCE.emitByte(0xE8);
+    JCE.emitWordLE((intptr_t)Target - JCE.getCurrentPCValue() - 4);
+  }
+  return Result;
+}
+
+// Relocations are the same as in X86, but the address being written
+// not the same as the address that the offset is relative to (see comment on
+// setRelocationBuffer in X86NaClJITInfo.h
+void X86NaClJITInfo::relocate(void *Function, MachineRelocation *MR,
+                    unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = RelocationBuffer + MR->getMachineCodeOffset();
+    void *RelocTargetPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((X86::RelocationType)MR->getRelocationType()) {
+    case X86::reloc_pcrel_word: {
+      // PC relative relocation, add the relocated value to the value already in
+      // memory, after we adjust it for where the PC is.
+      ResultPtr = ResultPtr -(intptr_t)RelocTargetPos - 4 - MR->getConstantVal();
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_picrel_word: {
+      // PIC base relative relocation, add the relocated value to the value
+      // already in memory, after we adjust it for where the PIC base is.
+      ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_absolute_word:
+    case X86::reloc_absolute_word_sext:
+      // Absolute relocation, just add the relocated value to the value already
+      // in memory.
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    case X86::reloc_absolute_dword:
+      *((intptr_t*)RelocPos) += ResultPtr;
+      break;
+    }
+  }
+}
+
+const uint8_t *X86NaClJITInfo::getNopSequence(size_t len) const {
+  // TODO(dschuff): use more efficient NOPs.
+  // Update emitNopPadding when it happens
+  assert((int)len <= kBundleSize &&
+         "Nop sequence can't be more than bundle size");
+  return NopString;
+}
+
+void X86NaClJITInfo::emitNopPadding(JITCodeEmitter &JCE, size_t len) {
+  for (size_t i = 0; i < len; i++) JCE.emitByte(NopString[i]);
+}
+
+const TargetJITInfo::HaltInstruction *X86NaClJITInfo::getHalt() const {
+  return &X86Hlt;
+}
+
+int X86NaClJITInfo::getBundleSize() const {
+  return kBundleSize;
+}
+
+int32_t X86NaClJITInfo::getJumpMask() const {
+  return FlagSfiX86JmpMask;
+}
diff --git a/lib/Target/X86/X86NaClJITInfo.h b/lib/Target/X86/X86NaClJITInfo.h
new file mode 100644
index 0000000000..9416efeff1
--- /dev/null
+++ b/lib/Target/X86/X86NaClJITInfo.h
@@ -0,0 +1,75 @@
+//=- X86NaClJITInfo.h - X86 implementation of the JIT interface  --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class for
+// Native Client
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86NACLJITINFO_H
+#define X86NACLJITINFO_H
+
+#include "X86JITInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+  class X86NaClJITInfo : public X86JITInfo {
+    void emitNopPadding(JITCodeEmitter &JCE, size_t len);
+    const X86Subtarget *Subtarget;
+    uintptr_t PICBase;
+    uint8_t *NopString;
+    HaltInstruction X86Hlt;
+    uint8_t *RelocationBuffer;
+   public:
+    static const int kBundleSize = 32;
+    explicit X86NaClJITInfo(X86TargetMachine &tm);
+    virtual ~X86NaClJITInfo();
+
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on X86 NaCl.
+    virtual StubLayout getStubLayout();
+
+    // Note: the emission and functions MUST NOT touch the target memory
+    virtual void *emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE);
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                        unsigned NumRelocs, unsigned char* GOTBase);
+
+    virtual char* allocateThreadLocalMemory(size_t size) {
+      //TODO(dschuff) Implement TLS or decide whether X86 TLS works
+      assert(0 && "This target does not implement thread local storage!");
+      return 0;
+    }
+    /// Return a string containing a sequence of NOPs which is valid for
+    /// the given length
+    virtual const uint8_t *getNopSequence(size_t len) const;
+    virtual const HaltInstruction *getHalt() const;
+    virtual int getBundleSize() const;
+    virtual int getJumpMask() const;
+    /// Relocations cannot happen in-place in NaCl because we can't write to
+    /// code. This function takes a pointer to where the code has been emitted,
+    /// before it is copied to the code region. The subsequent call to
+    /// relocate takes pointers to the target code location, but rewrites the
+    /// code in the relocation buffer rather than at the target
+    virtual void setRelocationBuffer(unsigned char * BufferBegin) {
+      RelocationBuffer = BufferBegin;
+    }
+  };
+}
+
+#endif
diff --git a/lib/Target/X86/X86NaClRewriteFinalPass.cpp b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
new file mode 100644
index 0000000000..93728ddb08
--- /dev/null
+++ b/lib/Target/X86/X86NaClRewriteFinalPass.cpp
@@ -0,0 +1,236 @@
+//=== X86NaClRewriteFinalPass.cpp - Expand NaCl pseudo-instructions  --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass expands NaCl pseudo-instructions into real instructions.
+// This duplicates much of the functionality found in X86MCNaCl.cpp but is
+// needed for non-MC JIT, which doesn't use MC. It expands pseudo instructions
+// into bundle-locked groups by emitting a BUNDLE_LOCK marker,
+// followed by the instructions, followed by a BUNDLE_UNLOCK marker.
+// The Code Emitter needs to ensure the alignment as it emits. Additionallly,
+// this pass needs to be run last, or the user at least needs to ensure that
+// subsequent passes do not reorder or remove any bundled groups.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-jit-sandboxing"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Function.h"
+
+using namespace llvm;
+
+extern cl::opt<int> FlagSfiX86JmpMask;
+
+namespace {
+  class X86NaClRewriteFinalPass : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86NaClRewriteFinalPass() : MachineFunctionPass(ID),
+        kJumpMask(FlagSfiX86JmpMask) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NaCl Pseudo-instruction expansion";
+    }
+
+  private:
+    const int kJumpMask;
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    bool Is64Bit;
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    void TraceLog(const char *fun,
+		  const MachineBasicBlock &MBB,
+		  const MachineBasicBlock::iterator MBBI) const;
+
+    void RewriteIndirectJump(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MBBI,
+                             bool Is64Bit,
+                             bool IsCall);
+    void RewriteDirectCall(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           bool Is64Bit);
+    bool ApplyCommonRewrites(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MBBI);
+
+  };
+
+  char X86NaClRewriteFinalPass::ID = 0;
+}
+
+void X86NaClRewriteFinalPass::RewriteIndirectJump(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI,
+    bool Is64Bit,
+    bool IsCall) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "rewrite indirect jump " << MBB);
+
+  unsigned reg32 = MI.getOperand(0).getReg();
+  unsigned reg64 = getX86SubSuperRegister(reg32, MVT::i64);
+
+  if (IsCall)
+    BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END));
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_LOCK));
+
+  BuildMI(MBB, MBBI, DL, TII->get(X86::AND32ri8))
+    .addReg(reg32)
+    .addReg(reg32)
+    //.addOperand(MI.getOperand(0))//correct flags, but might be 64bit reg
+    .addImm(kJumpMask);
+
+  if (Is64Bit) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::ADD64rr))
+      .addReg(reg64)
+      .addReg(reg64)
+      .addReg(X86::R15);
+  }
+
+  if (IsCall) {
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r))
+        .addReg(Is64Bit ? reg64 : reg32);
+  } else {
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::JMP64r : X86::JMP32r))
+        .addReg(Is64Bit ? reg64 : reg32);
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_UNLOCK));
+  MI.eraseFromParent();
+
+  DEBUG(dbgs() << "done rewrite indirect jump " << MBB);
+}
+
+void X86NaClRewriteFinalPass::RewriteDirectCall(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI,
+    bool Is64Bit) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  DEBUG(dbgs() << "rewrite direct call " << MBB);
+  const MachineOperand &MO = MI.getOperand(0);
+  // rewrite calls to immediates as indirect calls.
+  if (MO.isImm()) {
+    DEBUG(dbgs() << " is immediate " << MO);
+    // First, rewrite as a move imm->reg + indirect call sequence,
+    BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32ri))
+            .addReg(X86::ECX)
+            .addOperand(MO);
+    BuildMI(MBB, MBBI, DL, TII->get(Is64Bit ? X86::CALL64r : X86::CALL32r))
+            .addReg(X86::ECX);
+    // Then use RewriteIndirectJump to sandbox it
+    MachineBasicBlock::iterator I = MBBI;
+    --I; // I now points at the call instruction
+    MI.eraseFromParent();
+    return RewriteIndirectJump(MBB, I, Is64Bit, true);
+  }
+
+  BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::BUNDLE_ALIGN_END));
+
+  BuildMI(MBB, MBBI, DL,
+          TII->get(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32))
+          .addOperand(MI.getOperand(0));
+
+  MI.eraseFromParent();
+}
+
+bool X86NaClRewriteFinalPass::ApplyCommonRewrites(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  switch(Opcode) {
+  case X86::NACL_CALL32d:
+    RewriteDirectCall(MBB, MBBI, false);
+    break;
+  case X86::NACL_CALL64d:
+    RewriteDirectCall(MBB, MBBI, true);
+    break;
+  case X86::NACL_CALL32r:
+    RewriteIndirectJump(MBB, MBBI, false, true);
+    return true;
+  case X86::NACL_CALL64r:
+    RewriteIndirectJump(MBB, MBBI, true, true);
+    return true;
+  case X86::NACL_JMP32r:
+    RewriteIndirectJump(MBB, MBBI, false, false);
+    return true;
+  case X86::NACL_JMP64r:
+    RewriteIndirectJump(MBB, MBBI, true, false);
+    return true;
+  case X86::NACL_TRAP32:
+  case X86::NACL_TRAP64:
+  case X86::NACL_ASPi8:
+  case X86::NACL_ASPi32:
+  case X86::NACL_SSPi8:
+  case X86::NACL_SSPi32:
+  case X86::NACL_SPADJi32:
+  case X86::NACL_RESTBPm:
+  case X86::NACL_RESTBPr:
+  case X86::NACL_RESTSPm:
+  case X86::NACL_RESTSPr:
+  case X86::NACL_SETJ32:
+  case X86::NACL_SETJ64:
+  case X86::NACL_LONGJ32:
+  case X86::NACL_LONGJ64:
+    dbgs() << "inst, opcode not handled: " << MI << Opcode;
+    assert(false && "NaCl Pseudo-inst not handled");
+  case X86::NACL_RET32:
+  case X86::NACL_RET64:
+  case X86::NACL_RETI32:
+    assert(false && "Should not get RETs here");
+  }
+  return false;
+}
+
+bool X86NaClRewriteFinalPass::runOnMachineFunction(MachineFunction &MF) {
+  bool modified = false;
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  const X86Subtarget *subtarget = &TM->getSubtarget<X86Subtarget>();
+  assert(subtarget->isTargetNaCl() && "Target in NaClRewriteFinal is not NaCl");
+
+  DEBUG(dbgs() << "*************** NaCl Rewrite Final ***************\n");
+  DEBUG(dbgs() << " funcnum " << MF.getFunctionNumber() << " "
+               << MF.getFunction()->getName() << "\n");
+
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); 
+       MFI != E; ++MFI) {
+    modified |= runOnMachineBasicBlock(*MFI);
+  }
+
+  DEBUG(dbgs() << "************* NaCl Rewrite Final Done *************\n");
+  return modified;
+}
+
+bool X86NaClRewriteFinalPass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI;
+       MBBI != MBB.end(); MBBI = NextMBBI) {
+    ++NextMBBI;
+    if (ApplyCommonRewrites(MBB, MBBI)) {
+      modified = true;
+    }
+  }
+  return modified;
+}
+
+// return an instance of the pass
+namespace llvm {
+  FunctionPass *createX86NaClRewriteFinalPass() {
+    return new X86NaClRewriteFinalPass();
+  }
+}
diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp
new file mode 100644
index 0000000000..9b0922d2d0
--- /dev/null
+++ b/lib/Target/X86/X86NaClRewritePass.cpp
@@ -0,0 +1,869 @@
+//=== X86NaClRewritePAss.cpp - Rewrite instructions for NaCl SFI --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that ensures stores and loads and stack/frame
+// pointer addresses are within the NaCl sandbox (for x86-64).
+// It also ensures that indirect control flow follows NaCl requirments.
+//
+// The other major portion of rewriting for NaCl is done in X86InstrNaCl.cpp,
+// which is responsible for expanding the NaCl-specific operations introduced
+// here and also the intrinsic functions to support setjmp, etc.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+namespace {
+  class X86NaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86NaClRewritePass() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NaCl Rewrites";
+    }
+
+  private:
+
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const X86Subtarget *Subtarget;
+    bool Is64Bit;
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    void TraceLog(const char *func,
+                  const MachineBasicBlock &MBB,
+                  const MachineBasicBlock::iterator MBBI) const;
+
+    bool ApplyRewrites(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI);
+    bool ApplyStackSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyMemorySFI(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI);
+
+    bool ApplyFrameSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyControlSFI(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI);
+
+    void PassLightWeightValidator(MachineBasicBlock &MBB);
+    bool AlignJumpTableTargets(MachineFunction &MF);
+    bool RewritePushfPopf(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          MachineBasicBlock::iterator *Next);
+  };
+
+  char X86NaClRewritePass::ID = 0;
+
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI);
+
+static bool IsPushPop(MachineInstr &MI) {
+  const unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+   default:
+    return false;
+   case X86::PUSH64r:
+   case X86::POP64r:
+    return true;
+  }
+}
+
+static bool IsSandboxed(MachineInstr &MI);
+
+static bool IsStore(MachineInstr &MI) {
+  return MI.getDesc().mayStore();
+}
+
+static bool IsLoad(MachineInstr &MI) {
+  return MI.getDesc().mayLoad();
+}
+
+static bool IsFrameChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::EBP, NULL) ||
+         MI.modifiesRegister(X86::RBP, NULL);
+}
+
+static bool IsStackChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::ESP, NULL) ||
+         MI.modifiesRegister(X86::RSP, NULL);
+}
+
+
+static bool HasControlFlow(const MachineInstr &MI) {
+ return MI.getDesc().isBranch() ||
+        MI.getDesc().isCall() ||
+        MI.getDesc().isReturn() ||
+        MI.getDesc().isTerminator() ||
+        MI.getDesc().isBarrier();
+}
+
+static bool IsDirectBranch(const MachineInstr &MI) {
+  return  MI.getDesc().isBranch() &&
+         !MI.getDesc().isIndirectBranch();
+}
+
+static bool IsRegAbsolute(unsigned Reg) {
+  return (Reg == X86::RSP || Reg == X86::RBP ||
+          Reg == X86::R15 || Reg == X86::RIP);
+}
+
+static bool FindMemoryOperand(const MachineInstr &MI, unsigned* index) {
+  int NumFound = 0;
+  unsigned MemOp = 0;
+  for (unsigned i = 0; i < MI.getNumOperands(); ) {
+    if (isMem(&MI, i)) {
+      NumFound++;
+      MemOp = i;
+      i += X86::AddrNumOperands;
+    } else {
+      i++;
+    }
+  }
+
+  // Intrinsics and other functions can have mayLoad and mayStore to reflect
+  // the side effects of those functions.  This function is used to find
+  // explicit memory references in the instruction, of which there are none.
+  if (NumFound == 0)
+    return false;
+
+  if (NumFound > 1)
+    llvm_unreachable("Too many memory operands in instruction!");
+
+  *index = MemOp;
+  return true;
+}
+
+static unsigned PromoteRegTo64(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i64, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+static unsigned DemoteRegTo32(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i32, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+
+//
+// True if this MI restores RSP from RBP with a slight adjustment offset.
+//
+static bool MatchesSPAdj(const MachineInstr &MI) {
+  assert (MI.getOpcode() == X86::LEA64r && "Call to MatchesSPAdj w/ non LEA");
+  const MachineOperand &DestReg = MI.getOperand(0);
+  const MachineOperand &BaseReg = MI.getOperand(1);
+  const MachineOperand &Scale = MI.getOperand(2);
+  const MachineOperand &IndexReg = MI.getOperand(3);
+  const MachineOperand &Offset = MI.getOperand(4);
+  return (DestReg.isReg() && DestReg.getReg() == X86::RSP &&
+          BaseReg.isReg() && BaseReg.getReg() == X86::RBP &&
+          Scale.getImm() == 1 &&
+          IndexReg.isReg() && IndexReg.getReg() == 0 &&
+          Offset.isImm());
+}
+
+void
+X86NaClRewritePass::TraceLog(const char *func,
+                             const MachineBasicBlock &MBB,
+                             const MachineBasicBlock::iterator MBBI) const {
+  DEBUG(dbgs() << "@" << func << "(" << MBB.getName() << ", " << (*MBBI) << ")\n");
+}
+
+bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyStackSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsStackChange(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  if (MI.getDesc().isCall())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(DestReg == X86::ESP || DestReg == X86::RSP);
+
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::ADD64ri8 : NewOpc = X86::NACL_ASPi8; break;
+  case X86::ADD64ri32: NewOpc = X86::NACL_ASPi32; break;
+  case X86::SUB64ri8 : NewOpc = X86::NACL_SSPi8; break;
+  case X86::SUB64ri32: NewOpc = X86::NACL_SSPi32; break;
+  case X86::AND64ri32: NewOpc = X86::NACL_ANDSPi32; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addImm(MI.getOperand(2).getImm())
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Promote "MOV ESP, EBP" to a 64-bit move
+  if (Opc == X86::MOV32rr && MI.getOperand(1).getReg() == X86::EBP) {
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::MOV64rr));
+    Opc = X86::MOV64rr;
+  }
+
+  // "MOV RBP, RSP" is already safe
+  if (Opc == X86::MOV64rr && MI.getOperand(1).getReg() == X86::RBP) {
+    return true;
+  }
+
+  //  Promote 32-bit lea to 64-bit lea (does this ever happen?)
+  assert(Opc != X86::LEA32r && "Invalid opcode in 64-bit mode!");
+  if (Opc == X86::LEA64_32r) {
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned BaseReg = MI.getOperand(1).getReg();
+    unsigned Scale   = MI.getOperand(2).getImm();
+    unsigned IndexReg = MI.getOperand(3).getReg();
+    assert(DestReg == X86::ESP);
+    assert(Scale == 1);
+    assert(BaseReg == X86::EBP);
+    assert(IndexReg == 0);
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::LEA64r));
+    Opc = X86::LEA64r;
+  }
+
+  if (Opc == X86::LEA64r && MatchesSPAdj(MI)) {
+    const MachineOperand &Offset = MI.getOperand(4);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32))
+      .addImm(Offset.getImm())
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr))
+      .addReg(DemoteRegTo32(MI.getOperand(1).getReg()))
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rm) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPm))
+      .addOperand(MI.getOperand(1)) // Base
+      .addOperand(MI.getOperand(2)) // Scale
+      .addOperand(MI.getOperand(3)) // Index
+      .addOperand(MI.getOperand(4)) // Offset
+      .addOperand(MI.getOperand(5)) // Segment
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Stack SFI");
+}
+
+bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyFrameSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsFrameChange(MI))
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Handle moves to RBP
+  if (Opc == X86::MOV64rr) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+    unsigned SrcReg = MI.getOperand(1).getReg();
+
+    // MOV RBP, RSP is already safe
+    if (SrcReg == X86::RSP)
+      return false;
+
+    // Rewrite: mov %rbp, %rX
+    // To:      naclrestbp %eX, %r15
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr))
+      .addReg(DemoteRegTo32(SrcReg))
+      .addReg(X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Handle memory moves to RBP
+  if (Opc == X86::MOV64rm) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    // Rewrite: mov %rbp, (...)
+    // To:      naclrestbp (...), %r15
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addOperand(MI.getOperand(1))  // Base
+      .addOperand(MI.getOperand(2))  // Scale
+      .addOperand(MI.getOperand(3))  // Index
+      .addOperand(MI.getOperand(4))  // Offset
+      .addOperand(MI.getOperand(5))  // Segment
+      .addReg(X86::R15); // rZP
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Popping onto RBP
+  // Rewrite to:
+  //   naclrestbp (%rsp), %r15
+  //   naclasp $8, %r15
+  //
+  // TODO(pdox): Consider rewriting to this instead:
+  //   .bundle_lock
+  //   pop %rbp
+  //   mov %ebp,%ebp
+  //   add %r15, %rbp
+  //   .bundle_unlock
+  if (Opc == X86::POP64r) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addReg(X86::RSP)  // Base
+      .addImm(1)  // Scale
+      .addReg(0)  // Index
+      .addImm(0)  // Offset
+      .addReg(0)  // Segment
+      .addReg(X86::R15); // rZP
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8))
+      .addImm(8)
+      .addReg(X86::R15);
+
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Frame SFI");
+}
+
+bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyControlSFI", MBB, MBBI);
+  MachineInstr &MI = *MBBI;
+
+  if (!HasControlFlow(MI))
+    return false;
+
+  // Direct branches are OK
+  if (IsDirectBranch(MI))
+    return false;
+
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // Rewrite indirect jump/call instructions
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  // 32-bit
+  case X86::JMP32r               : NewOpc = X86::NACL_JMP32r; break;
+  case X86::TAILJMPr             : NewOpc = X86::NACL_JMP32r; break;
+  case X86::NACL_CG_CALL32r      : NewOpc = X86::NACL_CALL32r; break;
+  // 64-bit
+  case X86::NACL_CG_JMP64r       : NewOpc = X86::NACL_JMP64r; break;
+  case X86::NACL_CG_CALL64r      : NewOpc = X86::NACL_CALL64r; break;
+  case X86::NACL_CG_TAILJMPr64   : NewOpc = X86::NACL_JMP64r; break;
+  }
+  if (NewOpc) {
+    MachineInstrBuilder NewMI =
+     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+       .addOperand(MI.getOperand(0));
+    if (Is64Bit) {
+      NewMI.addReg(X86::R15);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // EH_RETURN has a single argment which is not actually used directly.
+  // The argument gives the location where to reposition the stack pointer
+  // before returning. EmitPrologue takes care of that repositioning.
+  // So EH_RETURN just ultimately emits a plain "ret"
+  if (Opc == X86::RET || Opc == X86::EH_RETURN || Opc == X86::EH_RETURN64) {
+    // To maintain compatibility with nacl-as, for now we don't emit naclret.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32));
+    if (Is64Bit) {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX);
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r))
+        .addReg(X86::ECX)
+        .addReg(X86::R15);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
+        .addReg(X86::ECX);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::RETI) {
+    // To maintain compatibility with nacl-as, for now we don't emit naclret.
+    // MI.setDesc(TII->get(X86::NACL_RETI32));
+    assert(!Is64Bit);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP)
+      .addReg(X86::ESP)
+      .addOperand(MI.getOperand(0));
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
+      .addReg(X86::ECX);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Rewrite trap
+  if (Opc == X86::TRAP) {
+    // To maintain compatibility with nacl-as, for now we don't emit nacltrap.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32));
+    BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi))
+      .addReg(Is64Bit ? X86::R15 : 0) // Base
+      .addImm(1) // Scale
+      .addReg(0) // Index
+      .addImm(0) // Offset
+      .addReg(0) // Segment
+      .addImm(0); // Value
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::NACL_LONGJ32 ||
+      Opc == X86::NACL_LONGJ64) {
+    // The expansions for these intrinsics already handle control SFI.
+    return false;
+  }
+
+  DumpInstructionVerbose(MI);
+  llvm_unreachable("Unhandled Control SFI");
+}
+
+//
+// Sandboxes loads and stores (64-bit only)
+//
+bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyMemorySFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsLoad(MI) && !IsStore(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  unsigned MemOp;
+  if (!FindMemoryOperand(MI, &MemOp))
+    return false;
+  assert(isMem(&MI, MemOp));
+  MachineOperand &BaseReg  = MI.getOperand(MemOp + 0);
+  MachineOperand &Scale = MI.getOperand(MemOp + 1);
+  MachineOperand &IndexReg  = MI.getOperand(MemOp + 2);
+  //MachineOperand &Disp = MI.getOperand(MemOp + 3);
+  MachineOperand &SegmentReg = MI.getOperand(MemOp + 4);
+
+  // Make sure the base and index are 64-bit registers.
+  IndexReg.setReg(PromoteRegTo64(IndexReg.getReg()));
+  BaseReg.setReg(PromoteRegTo64(BaseReg.getReg()));
+  assert(IndexReg.getSubReg() == 0);
+  assert(BaseReg.getSubReg() == 0);
+
+  bool AbsoluteBase = IsRegAbsolute(BaseReg.getReg());
+  bool AbsoluteIndex = IsRegAbsolute(IndexReg.getReg());
+  unsigned AddrReg = 0;
+
+  if (AbsoluteBase && AbsoluteIndex) {
+    llvm_unreachable("Unexpected absolute register pair");
+  } else if (AbsoluteBase) {
+    AddrReg = IndexReg.getReg();
+  } else if (AbsoluteIndex) {
+    assert(!BaseReg.getReg() && "Unexpected base register");
+    assert(Scale.getImm() == 1);
+    AddrReg = 0;
+  } else {
+    assert(!BaseReg.getReg() && "Unexpected relative register pair");
+    BaseReg.setReg(X86::R15);
+    AddrReg = IndexReg.getReg();
+  }
+
+  if (AddrReg) {
+    assert(!SegmentReg.getReg() && "Unexpected segment register");
+    SegmentReg.setReg(X86::PSEUDO_NACL_SEG);
+    return true;
+  }
+
+  return false;
+}
+
+bool X86NaClRewritePass::ApplyRewrites(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // These direct jumps need their opcode rewritten
+  // and variable operands removed.
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::NACL_CG_CALLpcrel32  : NewOpc = X86::NACL_CALL32d; break;
+  case X86::TAILJMPd             : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_TAILJMPd64   : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_CALL64pcrel32: NewOpc = X86::NACL_CALL64d; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addOperand(MI.getOperand(0));
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::NACL_CG_TLS_addr32) {
+    // Rewrite to nacltlsaddr32
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_TLS_addr32))
+      .addOperand(MI.getOperand(0))  // Base
+      .addOperand(MI.getOperand(1))  // Scale
+      .addOperand(MI.getOperand(2))  // Index
+      .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, X86II::MO_TLSGD)
+      .addOperand(MI.getOperand(4)); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // General Dynamic NaCl TLS model
+  // http://code.google.com/p/nativeclient/issues/detail?id=1685
+  if (Opc == X86::NACL_CG_GD_TLS_addr64) {
+
+    // Rewrite to:
+    //   leaq $sym@TLSGD(%rip), %rdi
+    //   naclcall __tls_get_addr@PLT
+    BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), X86::RDI)
+        .addReg(X86::RIP) // Base
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_CALL64d))
+        .addExternalSymbol("__tls_get_addr", X86II::MO_PLT);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Local Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_LE_TLS_addr64 ||
+      Opc == X86::NACL_CG_LE_TLS_addr32) {
+    unsigned CallOpc, LeaOpc, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   lea $sym@flag(,%reg), %reg
+    if (Opc == X86::NACL_CG_LE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      LeaOpc = X86::LEA64r;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      LeaOpc = X86::LEA32r;
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(LeaOpc), Reg)
+        .addReg(0) // Base
+        .addImm(1) // Scale
+        .addReg(Reg) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Initial Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_IE_TLS_addr64 ||
+      Opc == X86::NACL_CG_IE_TLS_addr32) {
+    unsigned CallOpc, AddOpc, Base, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   addq sym@flag(%base), %reg
+    if (Opc == X86::NACL_CG_IE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      AddOpc = X86::ADD64rm;
+      Base = X86::RIP;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      AddOpc = X86::ADD32rm;
+      Base = MI.getOperand(3).getTargetFlags() == X86II::MO_INDNTPOFF ?
+          0 : X86::EBX; // EBX for GOTNTPOFF.
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(AddOpc), Reg)
+        .addReg(Reg)
+        .addReg(Base)
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+// Rewrite the sequence generated to implement CopyToReg for EFLAGS, when
+// LLVM tries to keep EFLAGS live across a call to avoid emitting a CMP.
+// %r/m = <some flags-setting op>
+// pushf
+// pop %rY
+// <call>
+// push %rY
+// popf
+// <conditional branch>
+// becomes:
+// %r/m = <some flags-setting op>
+// %rY = %r/m
+// <call>
+// cmp %rY, 0
+// <conditional branch>
+// A proper fix would involve fixing X86TargetLowering::EmitTest to check
+// that a the path to the flags-setting op does not chain through a call
+// and avoid the optimization in that case
+// BUG: http://code.google.com/p/nativeclient/issues/detail?id=2711
+
+bool X86NaClRewritePass::RewritePushfPopf(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          MachineBasicBlock::iterator *Next) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+  bool Is64Bit = false;
+
+  switch(Opc) {
+    case X86::PUSHF64:
+      Is64Bit = true;
+      // fall through
+    case X86::PUSHF32: {
+      MachineBasicBlock::iterator Prev = MBBI;
+      --Prev;
+      assert((*Next)->getOpcode() == (Is64Bit ? X86::POP64r : X86::POP32r)
+             && "Unknown pushf sequence");
+      // Take the destination of the flags-setting op (Prev) and move it to
+      // the destination of the pop (Next)
+      int MovOpc;
+      if (Prev->memoperands_empty()) {
+        MovOpc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
+        BuildMI(MBB, MBBI, DL, TII->get(MovOpc))
+            .addOperand((*Next)->getOperand(0))
+            .addOperand(Prev->getOperand(0));
+      } else {
+        MovOpc = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
+        // Memory operands are an operand tuple of
+        // [base,scale,index,disp,segment]
+        BuildMI(MBB, MBBI, DL, TII->get(MovOpc))
+            .addOperand((*Next)->getOperand(0))
+            .addOperand(Prev->getOperand(0))
+            .addOperand(Prev->getOperand(1))
+            .addOperand(Prev->getOperand(2))
+            .addOperand(Prev->getOperand(3))
+            .addOperand(Prev->getOperand(4))
+            .addMemOperand(*Prev->memoperands_begin());
+      }
+
+      MI.eraseFromParent();
+      // Just use Prev as a placeholder to delete the pop
+      Prev = *Next;
+      ++(*Next);
+      Prev->eraseFromParent();
+      return true;
+    }
+    case X86::POPF64:
+      Is64Bit = true;
+      // fall through
+    case X86::POPF32: {
+      int PushOpc;
+      int CmpOpc;
+      PushOpc = Is64Bit ? X86::PUSH64r : X86::PUSH32r;
+      CmpOpc = Is64Bit ? X86::CMP64ri32 : X86::CMP32ri;
+
+      MachineBasicBlock::iterator Prev = MBBI;
+      --Prev;
+      // Create a compare of the destination of the push (Prev) to 0
+      assert(Prev->getOpcode() == PushOpc && "Unknown popf sequence");
+      BuildMI(MBB, MBBI, DL, TII->get(CmpOpc))
+          .addReg(Prev->getOperand(0).getReg())
+          .addImm(0);
+      Prev->eraseFromParent();
+      MI.eraseFromParent();
+      return true;
+    }
+    default:
+      return false;
+  }
+}
+
+bool X86NaClRewritePass::AlignJumpTableTargets(MachineFunction &MF) {
+  bool Modified = true;
+
+  MF.setAlignment(5); // log2, 32 = 2^5
+
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI != NULL) {
+    const std::vector<MachineJumpTableEntry> &JT = JTI->getJumpTables();
+    for (unsigned i = 0; i < JT.size(); ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      for (unsigned j = 0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(5);
+        Modified |= true;
+      }
+    }
+  }
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  bool Modified = false;
+
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  Subtarget = &TM->getSubtarget<X86Subtarget>();
+  Is64Bit = Subtarget->is64Bit();
+
+  assert(Subtarget->isTargetNaCl() && "Unexpected target in NaClRewritePass!");
+
+  DEBUG(dbgs() << "*************** NaCl Rewrite Pass ***************\n");
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    Modified |= runOnMachineBasicBlock(*MFI);
+  }
+  Modified |= AlignJumpTableTargets(MF);
+  DEBUG(dbgs() << "*************** NaCl Rewrite DONE  ***************\n");
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  if (MBB.hasAddressTaken()) {
+    //FIXME: use a symbolic constant or get this value from some configuration
+    MBB.setAlignment(5);
+    Modified = true;
+  }
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI;
+       MBBI != MBB.end(); MBBI = NextMBBI) {
+    ++NextMBBI;
+    // When one of these methods makes a change,
+    // it returns true, skipping the others.
+    if (ApplyRewrites(MBB, MBBI) ||
+        RewritePushfPopf(MBB, MBBI, &NextMBBI) ||
+        (Is64Bit && ApplyStackSFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyMemorySFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyFrameSFI(MBB, MBBI)) ||
+        ApplyControlSFI(MBB, MBBI)) {
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+static bool IsSandboxed(MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  // 32-bit
+  case X86::NACL_TRAP32:
+  case X86::NACL_RET32:
+  case X86::NACL_RETI32:
+  case X86::NACL_JMP32r:
+  case X86::NACL_CALL32d:
+  case X86::NACL_CALL32r:
+
+  // 64-bit
+  case X86::NACL_TRAP64:
+  case X86::NACL_RET64:
+  case X86::NACL_JMP64r:
+  case X86::NACL_CALL64r:
+  case X86::NACL_CALL64d:
+
+  case X86::NACL_ASPi8:
+  case X86::NACL_ASPi32:
+  case X86::NACL_SSPi8:
+  case X86::NACL_SSPi32:
+  case X86::NACL_SPADJi32:
+  case X86::NACL_RESTSPr:
+  case X86::NACL_RESTSPm:
+  case X86::NACL_RESTBPr:
+  case X86::NACL_RESTBPm:
+    return true;
+
+  case X86::MOV64rr:
+    // copy from safe regs
+    const MachineOperand &DestReg = MI.getOperand(0);
+    const MachineOperand &SrcReg = MI.getOperand(1);
+    return DestReg.getReg() == X86::RSP && SrcReg.getReg() == X86::RBP;
+  }
+  return false;
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  dbgs() << MI;
+  dbgs() << MI.getNumOperands() << " operands:" << "\n";
+  for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+    const MachineOperand& op = MI.getOperand(i);
+    dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+  }
+  dbgs() << "\n";
+}
+
+/// createX86NaClRewritePassPass - returns an instance of the pass.
+namespace llvm {
+  FunctionPass* createX86NaClRewritePass() {
+    return new X86NaClRewritePass();
+  }
+}
diff --git a/lib/Wrap/LLVMBuild.txt b/lib/Wrap/LLVMBuild.txt
new file mode 100644
index 0000000000..8750711338
--- /dev/null
+++ b/lib/Wrap/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Wrap/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Wrap
+parent = Libraries
diff --git a/lib/Wrap/Makefile b/lib/Wrap/Makefile
new file mode 100644
index 0000000000..79aa2b3531
--- /dev/null
+++ b/lib/Wrap/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMWrap
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Wrap/bitcode_wrapperer.cpp b/lib/Wrap/bitcode_wrapperer.cpp
new file mode 100644
index 0000000000..eeb2825793
--- /dev/null
+++ b/lib/Wrap/bitcode_wrapperer.cpp
@@ -0,0 +1,355 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/bitcode_wrapperer.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+using std::vector;
+
+// The number of bytes in a 32 bit integer.
+static const uint32_t kWordSize = 4;
+
+// Number of LLVM-defined fixed fields in the header.
+static const uint32_t kLLVMFields = 4;
+
+// Total number of fixed fields in the header.
+static const uint32_t kFixedFields = 7;
+
+// The magic number that must exist for bitcode wrappers.
+static const uint32_t kWrapperMagicNumber = 0x0B17C0DE;
+
+// The version number associated with a wrapper file.
+// Note: llvm currently only allows the value 0. When this changes,
+// we should consider making this a command line option.
+static const uint32_t kLLVMVersionNumber = 0;
+
+// Fields defined by Android bitcode header.
+static const uint32_t kAndroidHeaderVersion = 0;
+static const uint32_t kAndroidTargetAPI = 0;
+static const uint32_t kAndroidDefaultCompilerVersion = 0;
+static const uint32_t kAndroidDefaultOptimizationLevel = 3;
+
+// PNaCl bitcode version number.
+static const uint32_t kPnaclBitcodeVersion = 0;
+
+// Max size for variable fields. Currently only used for writing them
+// out to files (the parsing works for arbitrary sizes).
+static const size_t kMaxVariableFieldSize = 256;
+
+BitcodeWrapperer::BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile)
+    : infile_(infile),
+      outfile_(outfile),
+      buffer_size_(0),
+      cursor_(0),
+      infile_at_eof_(false),
+      infile_bc_offset_(0),
+      wrapper_bc_offset_(0),
+      wrapper_bc_size_(0),
+      android_header_version_(kAndroidHeaderVersion),
+      android_target_api_(kAndroidTargetAPI),
+      pnacl_bc_version_(0),
+      error_(false) {
+  buffer_.resize(kBitcodeWrappererBufferSize);
+  if (IsInputBitcodeWrapper()) {
+    ParseWrapperHeader();
+  } else if (IsInputBitcodeFile()) {
+    wrapper_bc_offset_ = kWordSize * kFixedFields;
+    wrapper_bc_size_ = GetInFileSize();
+  } else {
+    fprintf(stderr, "Error: input file is not a bitcode file.\n");
+    error_ = true;
+  }
+}
+
+BitcodeWrapperer::~BitcodeWrapperer() {
+  for(size_t i = 0; i < variable_field_data_.size(); i++) {
+    delete [] variable_field_data_[i];
+  }
+}
+
+
+void BitcodeWrapperer::ClearBuffer() {
+  buffer_size_ = 0;
+  cursor_ = 0;
+  infile_at_eof_ = false;
+}
+
+bool BitcodeWrapperer::Seek(uint32_t pos) {
+  if (infile_ != NULL && infile_->Seek(pos)) {
+    ClearBuffer();
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::CanReadWord() {
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    return GetBufferUnreadBytes() >= kWordSize;
+  } else {
+    return true;
+  }
+}
+
+void BitcodeWrapperer::FillBuffer() {
+  if (cursor_ > 0) {
+    // Before filling, move any remaining bytes to the
+    // front of the buffer. This allows us to assume
+    // that after the call to FillBuffer, readable
+    // text is contiguous.
+    if (cursor_ < buffer_size_) {
+      size_t i = 0;
+      while (cursor_ < buffer_size_) {
+        buffer_[i++] = buffer_[cursor_++];
+      }
+      cursor_ = 0;
+      buffer_size_ = i;
+    }
+  } else {
+    // Assume the buffer contents have been used,
+    // and we want to completely refill it.
+    buffer_size_ = 0;
+  }
+
+  // If we don't have an input, we can't refill the buffer at all.
+  if (infile_ == NULL) {
+    return;
+  }
+
+  // Now fill in remaining space.
+  size_t needed = buffer_.size() - buffer_size_;
+
+  while (buffer_.size() > buffer_size_) {
+    int actually_read = infile_->Read(&buffer_[buffer_size_], needed);
+    if (infile_->AtEof()) {
+      infile_at_eof_ = true;
+    }
+    if (actually_read) {
+      buffer_size_ += actually_read;
+      needed -= actually_read;
+    } else if (infile_at_eof_) {
+      break;
+    }
+  }
+}
+
+bool BitcodeWrapperer::ReadWord(uint32_t& word) {
+  if (!CanReadWord()) return false;
+  word = (((uint32_t) BufferLookahead(0)) << 0)
+      | (((uint32_t) BufferLookahead(1)) << 8)
+      | (((uint32_t) BufferLookahead(2)) << 16)
+      | (((uint32_t) BufferLookahead(3)) << 24);
+  cursor_ += kWordSize;
+  return true;
+}
+
+bool BitcodeWrapperer::WriteWord(uint32_t value) {
+  uint8_t buffer[kWordSize];
+  buffer[3] = (value >> 24) & 0xFF;
+  buffer[2] = (value >> 16) & 0xFF;
+  buffer[1] = (value >> 8)  & 0xFF;
+  buffer[0] = (value >> 0)  & 0xFF;
+  return outfile_->Write(buffer, kWordSize);
+}
+
+bool BitcodeWrapperer::WriteVariableFields() {
+  // This buffer may have to be bigger if we start using the fields
+  // for larger things.
+  uint8_t buffer[kMaxVariableFieldSize];
+  for (vector<BCHeaderField>::iterator it = header_fields_.begin();
+       it != header_fields_.end(); ++it) {
+    if (!it->Write(buffer, kMaxVariableFieldSize) ||
+        !outfile_->Write(buffer, it->GetTotalSize())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::ParseWrapperHeader() {
+  // Make sure LLVM-defined fields have been parsed
+  if (!IsInputBitcodeWrapper()) return false;
+  // Check the android/pnacl fields
+  if (!ReadWord(android_header_version_) ||
+      !ReadWord(android_target_api_) || !ReadWord(pnacl_bc_version_)) {
+    fprintf(stderr, "Error: file not long enough to contain header\n");
+    return false;
+  }
+  if (pnacl_bc_version_ != kPnaclBitcodeVersion) {
+    fprintf(stderr, "Error: bad PNaCl Bitcode version\n");
+    return false;
+  }
+  int field_data_total = wrapper_bc_offset_ - kWordSize * kFixedFields;
+  if (field_data_total > 0) {
+    // Read in the variable fields. We need to allocate space for the data.
+    int field_data_read = 0;
+
+    while (field_data_read < field_data_total) {
+      FillBuffer();
+      size_t buffer_needed = BCHeaderField::GetDataSizeFromSerialized(
+          &buffer_[cursor_]);
+      if (buffer_needed > buffer_.size()) {
+        buffer_.resize(buffer_needed +
+                       sizeof(BCHeaderField::FixedSubfield) * 2);
+        FillBuffer();
+      }
+      variable_field_data_.push_back(new uint8_t[buffer_needed]);
+
+      BCHeaderField field(BCHeaderField::kInvalid, 0,
+                          variable_field_data_.back());
+      field.Read(&buffer_[cursor_], buffer_size_);
+      header_fields_.push_back(field);
+      size_t field_size = field.GetTotalSize();
+      cursor_ += field_size;
+      field_data_read += field_size;
+      if (field_data_read > field_data_total) {
+        // We read too much data, the header is corrupted
+        fprintf(stderr, "Error: raw bitcode offset inconsistent with "
+                "variable field data\n");
+        return false;
+      }
+    }
+    Seek(0);
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeWrapper() {
+  ResetCursor();
+  // First make sure that there are enough words (LLVM header)
+  // to peek at.
+  if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) return false;
+  }
+
+  // Now make sure the magic number is right.
+  uint32_t first_word;
+  if ((!ReadWord(first_word)) ||
+      (kWrapperMagicNumber != first_word)) return false;
+
+  // Make sure the version is right.
+  uint32_t second_word;
+  if ((!ReadWord(second_word)) ||
+      (kLLVMVersionNumber != second_word)) return false;
+
+  // Make sure that the offset and size (for llvm) is defined.
+  uint32_t bc_offset;
+  uint32_t bc_size;
+  if (ReadWord(bc_offset) &&
+      ReadWord(bc_size)) {
+    // Before returning, save the extracted values.
+    wrapper_bc_offset_ = bc_offset;
+    infile_bc_offset_ = bc_offset;
+    wrapper_bc_size_ = bc_size;
+    return true;
+  }
+  // If reached, unable to read wrapped header.
+  return false;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeFile() {
+  ResetCursor();
+  // First make sure that there are four bytes to peek at.
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kWordSize) return false;
+  }
+  // If reached, Check if first 4 bytes match bitcode
+  // file magic number.
+  return (BufferLookahead(0) == 'B') &&
+      (BufferLookahead(1) == 'C') &&
+      (BufferLookahead(2) == 0xc0) &&
+      (BufferLookahead(3) == 0xde);
+}
+
+bool BitcodeWrapperer::BufferCopyInToOut(uint32_t size) {
+  while (size > 0) {
+    // Be sure buffer is non-empty before writing.
+    if (0 == buffer_size_) {
+      FillBuffer();
+      if (0 == buffer_size_) {
+        return false;
+      }
+    }
+    // copy the buffer to the output file.
+    size_t block = (buffer_size_ < size) ? buffer_size_ : size;
+    if (!outfile_->Write(&buffer_[cursor_], block)) return false;
+    size -= block;
+    buffer_size_ = 0;
+  }
+  // Be sure that there isn't more bytes on the input stream.
+  FillBuffer();
+  return buffer_size_ == 0;
+}
+
+void BitcodeWrapperer::AddHeaderField(BCHeaderField* field) {
+  vector<BCHeaderField>::iterator it = header_fields_.begin();
+  for (; it != header_fields_.end(); ++it) {
+    // If this field is the same as an existing one, overwrite it.
+    if (it->getID() == field->getID()) {
+      wrapper_bc_offset_ += (field->GetTotalSize() - it->GetTotalSize());
+      *it = *field;
+      break;
+    }
+  }
+  if (it == header_fields_.end()) { // there was no match, add a new field
+    header_fields_.push_back(*field);
+    wrapper_bc_offset_ += field->GetTotalSize();
+  }
+}
+
+bool BitcodeWrapperer::WriteBitcodeWrapperHeader() {
+  return
+      // Note: This writes out the 4 word header required by llvm wrapped
+      // bitcode.
+      WriteWord(kWrapperMagicNumber) &&
+      WriteWord(kLLVMVersionNumber) &&
+      WriteWord(wrapper_bc_offset_) &&
+      WriteWord(wrapper_bc_size_) &&
+      // 2 fixed fields defined by Android
+      WriteWord(android_header_version_) &&
+      WriteWord(android_target_api_) &&
+      // PNaClBitcode version
+      WriteWord(kPnaclBitcodeVersion) &&
+      // Common variable-length fields
+      WriteVariableFields();
+}
+
+void BitcodeWrapperer::PrintWrapperHeader() {
+  if (error_) {
+    fprintf(stderr, "Error condition exists: the following"
+            "data may not be reliable\n");
+  }
+  fprintf(stderr, "Wrapper magic:\t\t%x\n", kWrapperMagicNumber);
+  fprintf(stderr, "LLVM Bitcode version:\t%d\n", kLLVMVersionNumber);
+  fprintf(stderr, "Raw bitcode offset:\t%d\n", wrapper_bc_offset_);
+  fprintf(stderr, "Raw bitcode size:\t%d\n", wrapper_bc_size_);
+  fprintf(stderr, "Android header version:\t%d\n", android_header_version_);
+  fprintf(stderr, "Android target API:\t%d\n", android_target_api_);
+  fprintf(stderr, "PNaCl bitcode version:\t%d\n", kPnaclBitcodeVersion);
+  for (size_t i = 0; i < header_fields_.size(); i++) header_fields_[i].Print();
+}
+
+bool BitcodeWrapperer::GenerateWrappedBitcodeFile() {
+  if (!error_ &&
+      WriteBitcodeWrapperHeader() &&
+      Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_)) {
+    off_t dangling = wrapper_bc_size_ & 3;
+    if (dangling) {
+      return outfile_->Write((const uint8_t*) "\0\0\0\0", 4 - dangling);
+    }
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::GenerateRawBitcodeFile() {
+  return !error_ && Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_);
+}
diff --git a/lib/Wrap/file_wrapper_input.cpp b/lib/Wrap/file_wrapper_input.cpp
new file mode 100644
index 0000000000..fc592e0246
--- /dev/null
+++ b/lib/Wrap/file_wrapper_input.cpp
@@ -0,0 +1,53 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include "llvm/Wrap/file_wrapper_input.h"
+
+FileWrapperInput::FileWrapperInput(const std::string& name) :
+    _name(name), _at_eof(false), _size_found(false), _size(0) {
+  _file = fopen(name.c_str(), "rb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperInput::~FileWrapperInput() {
+  fclose(_file);
+}
+
+size_t FileWrapperInput::Read(uint8_t* buffer, size_t wanted) {
+  size_t found = fread((char*) buffer, 1, wanted, _file);
+  if (feof(_file) || ferror(_file)) {
+    _at_eof = true;
+  }
+  return found;
+}
+
+bool FileWrapperInput::AtEof() {
+  return _at_eof;
+}
+
+off_t FileWrapperInput::Size() {
+  if (_size_found) return _size;
+  struct stat st;
+  if (0 == stat(_name.c_str(), &st)) {
+    _size_found = true;
+    _size = st.st_size;
+    return _size;
+  } else {
+    fprintf(stderr, "Unable to compute file size: %s\n", _name.c_str());
+    exit(1);
+  }
+  // NOT REACHABLE.
+  return 0;
+}
+
+bool FileWrapperInput::Seek(uint32_t pos) {
+  return 0 == fseek(_file, (long) pos, SEEK_SET);
+}
diff --git a/lib/Wrap/file_wrapper_output.cpp b/lib/Wrap/file_wrapper_output.cpp
new file mode 100644
index 0000000000..f9f126868d
--- /dev/null
+++ b/lib/Wrap/file_wrapper_output.cpp
@@ -0,0 +1,37 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/file_wrapper_output.h"
+#include <stdlib.h>
+
+
+FileWrapperOutput::FileWrapperOutput(const std::string& name)
+    : _name(name) {
+  _file = fopen(name.c_str(), "wb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperOutput::~FileWrapperOutput() {
+  fclose(_file);
+}
+
+bool FileWrapperOutput::Write(uint8_t byte) {
+  return EOF != fputc(byte, _file);
+}
+
+bool FileWrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  if (!buffer) {
+    return false;
+  }
+
+  if (buffer_size > 0) {
+    return buffer_size == fwrite(buffer, 1, buffer_size, _file);
+  } else {
+    return true;
+  }
+}
diff --git a/lib/Wrap/wrapper_output.cpp b/lib/Wrap/wrapper_output.cpp
new file mode 100644
index 0000000000..493f29efa8
--- /dev/null
+++ b/lib/Wrap/wrapper_output.cpp
@@ -0,0 +1,9 @@
+#include "llvm/Wrap/wrapper_output.h"
+
+bool WrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  // Default implementation that uses the byte write routine.
+  for (size_t i = 0; i < buffer_size; ++i) {
+    if (!Write(buffer[i])) return false;
+  }
+  return true;
+}
diff --git a/tools/bc-wrap/LLVMBuild.txt b/tools/bc-wrap/LLVMBuild.txt
new file mode 100644
index 0000000000..b515fc04b9
--- /dev/null
+++ b/tools/bc-wrap/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/llc/LLVMBuild.txt --------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = bc-wrap
+parent = Tools
+required_libraries = Wrap all-targets
diff --git a/tools/bc-wrap/Makefile b/tools/bc-wrap/Makefile
new file mode 100644
index 0000000000..dccff2ecde
--- /dev/null
+++ b/tools/bc-wrap/Makefile
@@ -0,0 +1,20 @@
+#===- tools/bc-wrap/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = bc-wrap
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) Wrap
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/bc-wrap/bc_wrap.cpp b/tools/bc-wrap/bc_wrap.cpp
new file mode 100644
index 0000000000..5311f714ee
--- /dev/null
+++ b/tools/bc-wrap/bc_wrap.cpp
@@ -0,0 +1,123 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+/*
+ * Utility to wrap a .bc file, using LLVM standard+ custom headers.
+ */
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Wrap/bitcode_wrapperer.h"
+#include "llvm/Wrap/file_wrapper_input.h"
+#include "llvm/Wrap/file_wrapper_output.h"
+
+#include <ctype.h>
+#include <string.h>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::Required);
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("<output file>"));
+
+static cl::opt<bool> UnwrapFlag("u",
+                                cl::desc("unwrap rather than wrap the file"),
+                                cl::init(false));
+
+static cl::opt<bool> VerboseFlag("v",
+                                 cl::desc("print verbose header information"),
+                                 cl::init(false));
+
+static cl::opt<bool> DryRunFlag("n",
+                                cl::desc("Dry run (implies -v)"),
+                                cl::init(false));
+
+// Accept the hash on the command line to avoid having to include sha1
+// library with the LLVM code
+static cl::opt<std::string> BitcodeHash("hash",
+  cl::desc("Hash of bitcode (ignored if -u is given)"));
+
+const int kMaxBinaryHashLen = 32;
+
+// Convert ASCII hex hash to binary hash. return buffer and length.
+// The caller must free the returned buffer.
+static uint8_t* ParseBitcodeHash(int* len) {
+  if (BitcodeHash.size() > kMaxBinaryHashLen * 2 ||
+      BitcodeHash.size() % 2) return NULL;
+  *len = BitcodeHash.size() / 2;
+  uint8_t* buf = new uint8_t[*len];
+  const char* arg = BitcodeHash.data();
+  for (size_t i = 0; i < BitcodeHash.size() / 2; i++) {
+    unsigned int r; // glibc has %hhx but it's nonstandard
+    if (!isxdigit(*(arg + 2 * i + 1)) || // sscanf ignores trailing junk
+        !sscanf(arg + 2 * i, "%2x", &r) ||
+        r > std::numeric_limits<uint8_t>::max()) {
+      delete [] buf;
+      return NULL;
+    }
+    buf[i] = static_cast<uint8_t>(r);
+  }
+  return buf;
+}
+
+int main(const int argc, const char* argv[]) {
+  bool success = true;
+  cl::ParseCommandLineOptions(argc, argv, "bitcode wrapper/unwrapper\n");
+  if (OutputFilename == "") {
+    // Default to input file = output file. The cl lib doesn't seem to
+    // directly support initializing one opt from another.
+    OutputFilename = InputFilename;
+  }
+  if (DryRunFlag) VerboseFlag = true;
+  sys::fs::file_status outfile_status;
+  std::string outfile_temp;
+  outfile_temp = std::string(OutputFilename) + ".temp";
+  if (UnwrapFlag) {
+    FileWrapperInput inbc(InputFilename);
+    FileWrapperOutput outbc(outfile_temp);
+    BitcodeWrapperer wrapperer(&inbc, &outbc);
+    if (wrapperer.IsInputBitcodeWrapper()) {
+      if (VerboseFlag) {
+        fprintf(stderr, "Headers read from infile:\n");
+        wrapperer.PrintWrapperHeader();
+      }
+      if (DryRunFlag)
+        return 0;
+      success = wrapperer.GenerateRawBitcodeFile();
+    }
+  } else {
+    FileWrapperInput inbc(InputFilename);
+    FileWrapperOutput outbc(outfile_temp);
+    BitcodeWrapperer wrapperer(&inbc, &outbc);
+    if (BitcodeHash.size()) {
+      // SHA-2 hash is 256 bit
+      int hash_len;
+      uint8_t* buf = ParseBitcodeHash(&hash_len);
+      if (!buf) {
+        fprintf(stderr, "Bitcode hash must be a hex string <= 64 chars.\n");
+        exit(1);
+      }
+      BCHeaderField hash(BCHeaderField::kBitcodeHash, hash_len, buf);
+      wrapperer.AddHeaderField(&hash);
+    }
+    if (VerboseFlag) {
+      fprintf(stderr, "Headers generated:\n");
+      wrapperer.PrintWrapperHeader();
+    }
+    if (DryRunFlag)
+      return 0;
+    success = wrapperer.GenerateWrappedBitcodeFile();
+  }
+  error_code ec;
+  if ((ec = sys::fs::rename(outfile_temp, OutputFilename))) {
+    fprintf(stderr, "Could not rename temporary: %s\n", ec.message().c_str());
+    success = false;
+  }
+  if (success) return 0;
+  fprintf(stderr, "error: Unable to generate a proper %s bitcode file!\n",
+          (UnwrapFlag ? "unwrapped" : "wrapped"));
+  return 1;
+}
diff --git a/tools/llc/ELFStub.h b/tools/llc/ELFStub.h
new file mode 100644
index 0000000000..a79fecff0f
--- /dev/null
+++ b/tools/llc/ELFStub.h
@@ -0,0 +1,55 @@
+// This file describes a simple high-level representation of an ELF stub.
+
+#ifndef __ELF_STUB_H
+#define __ELF_STUB_H
+
+#include <llvm/Support/ELF.h>
+#include <llvm/ADT/StringMap.h>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct SymbolStub;
+struct VersionDefinition;
+
+using ELF::Elf32_Half;
+
+struct ELFStub {
+  Elf32_Half Machine;
+  std::string SOName;
+  std::vector<SymbolStub> Symbols;
+  std::vector<VersionDefinition> VerDefs;
+
+  // These are used for constructing the version definitions.
+  // They are not directly emitted to the ELF stub.
+  StringMap<Elf32_Half> IndexMap; // Maps version name to version index.
+  Elf32_Half NextIndex;           // Next available version index
+};
+
+
+// Dynamic symbol entries
+struct SymbolStub {
+  // Symbol Table info.
+  std::string Name;
+  unsigned char Type; // STT_*
+  unsigned char Binding; // STB_*
+  unsigned char Visibility; // STV_*
+  ELF::Elf32_Word Size; // Guess for st_size.
+  // st_value, etc. are stubbed out.
+
+  // Version info matching each of the symbols.
+  Elf32_Half VersionIndex; // vd_ndx
+  bool IsDefault;
+};
+
+// Versions defined in this module
+struct VersionDefinition {
+  Elf32_Half Index; // vd_ndx
+  bool IsWeak; // TODO(pdox): Implement this (for vd_flags)
+  std::string Name; // for vda_name, etc.
+  std::vector<std::string> Parents; // TODO(pdox): Implement this
+};
+
+}
+#endif
diff --git a/tools/llc/SRPCStreamer.cpp b/tools/llc/SRPCStreamer.cpp
new file mode 100644
index 0000000000..3eaa7c17c6
--- /dev/null
+++ b/tools/llc/SRPCStreamer.cpp
@@ -0,0 +1,116 @@
+//===-- SRPCStreamer.cpp - Stream bitcode over SRPC  ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__native_client__) && defined(NACL_SRPC)
+#define DEBUG_TYPE "bitcode-stream"
+#include "SRPCStreamer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <errno.h>
+
+using llvm::dbgs;
+
+size_t QueueStreamer::GetBytes(unsigned char *buf, size_t len) {
+  pthread_mutex_lock(&Mutex);
+  while (!Done && queueSize() < len) {
+    DEBUG(dbgs() << "QueueStreamer::GetBytes len " << len << " size " <<
+          queueSize() <<" << waiting\n");
+    pthread_cond_wait(&Cond, &Mutex);
+  }
+  if (Done && queueSize() < len) len = queueSize();
+  queueGet(buf, len);
+  pthread_mutex_unlock(&Mutex);
+  return len;
+}
+
+size_t QueueStreamer::PutBytes(unsigned char *buf, size_t len) {
+  pthread_mutex_lock(&Mutex);
+  queuePut(buf, len);
+  pthread_cond_signal(&Cond);
+  pthread_mutex_unlock(&Mutex);
+  return len;
+}
+
+void QueueStreamer::SetDone() {
+  // Still need the lock to avoid signaling between the check and
+  // the wait in GetBytes.
+  pthread_mutex_lock(&Mutex);
+  Done = true;
+  pthread_cond_signal(&Cond);
+  pthread_mutex_unlock(&Mutex);
+}
+
+// Called with Mutex held to protect Cons, Prod, and Bytes
+void QueueStreamer::queuePut(unsigned char *buf, size_t len) {
+  while (capacityRemaining() < len) {
+    int leftover = Bytes.size() - Cons;
+    DEBUG(dbgs() << "resizing " << leftover << " " << Prod << " " <<
+          Cons << "\n");
+    Bytes.resize(Bytes.size() * 2);
+    if (Cons > Prod) {
+      // There are unread bytes left between Cons and the previous end of the
+      // buffer. Move them to the new end of the buffer.
+      memmove(&Bytes[Bytes.size() - leftover], &Bytes[Cons], leftover);
+      Cons = Bytes.size() - leftover;
+    }
+  }
+  size_t EndSpace = std::min(len, Bytes.size() - Prod);
+  DEBUG(dbgs() << "put, len " << len << " Endspace " << EndSpace << " p " <<
+        Prod << " c " << Cons << "\n");
+  // Copy up to the end of the buffer
+  memcpy(&Bytes[Prod], buf, EndSpace);
+  // Wrap around if necessary
+  memcpy(&Bytes[0], buf + EndSpace, len - EndSpace);
+  Prod = (Prod + len) % Bytes.size();
+}
+
+// Called with Mutex held to protect Cons, Prod, and Bytes
+void QueueStreamer::queueGet(unsigned char *buf, size_t len) {
+  assert(len <= queueSize());
+  size_t EndSpace = std::min(len, Bytes.size() - Cons);
+  DEBUG(dbgs() << "get, len " << len << " Endspace " << EndSpace << " p " <<
+        Prod << " c " << Cons << "\n");
+  // Copy up to the end of the buffer
+  memcpy(buf, &Bytes[Cons], EndSpace);
+  // Wrap around if necessary
+  memcpy(buf + EndSpace, &Bytes[0], len - EndSpace);
+  Cons = (Cons + len) % Bytes.size();
+}
+
+llvm::DataStreamer *SRPCStreamer::init(void *(*Callback)(void *), void *arg,
+                                       std::string *ErrMsg) {
+  int err = pthread_create(&CompileThread, NULL, Callback, arg);
+  if (err) {
+    if (ErrMsg) *ErrMsg = std::string(strerror(errno));
+    return NULL;
+  }
+  return &Q;
+}
+
+size_t SRPCStreamer::gotChunk(unsigned char *bytes, size_t len) {
+  if (Error) return 0;
+  return Q.PutBytes(bytes, len);
+}
+
+int SRPCStreamer::streamEnd(std::string *ErrMsg) {
+  Q.SetDone();
+  int err = pthread_join(CompileThread, NULL);
+  if (err) {
+    if (ErrMsg) *ErrMsg = std::string(strerror(errno));
+    return err;
+  }
+  if (Error && ErrMsg) *ErrMsg = std::string("compile failed.");
+  return Error;
+}
+
+#endif
diff --git a/tools/llc/SRPCStreamer.h b/tools/llc/SRPCStreamer.h
new file mode 100644
index 0000000000..a326d9276d
--- /dev/null
+++ b/tools/llc/SRPCStreamer.h
@@ -0,0 +1,93 @@
+//===-- SRPCStreamer.cpp - Stream bitcode over SRPC  ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SRPCSTREAMER_H
+#define SRPCSTREAMER_H
+
+#include <pthread.h>
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <vector>
+#include "llvm/Support/DataStream.h"
+
+// Implements LLVM's interface for fetching data from a stream source.
+// Bitcode bytes from the RPC thread are placed here with PutBytes and buffered
+// until the bitcode reader calls GetBytes to remove them.
+class QueueStreamer : public llvm::DataStreamer {
+ public:
+ QueueStreamer() : Done(false), Prod(0), Cons(0) {
+    pthread_mutex_init(&Mutex, NULL);
+    pthread_cond_init(&Cond, NULL);
+    Bytes.resize(64 * 1024);
+  }
+  // Called by the compilation thread. Wait for len bytes to become available,
+  // and copy them into buf. If all bytes have been received and there are
+  // fewer than len bytes available, copy all remaining bytes.
+  // Return the number of bytes copied.
+  virtual size_t GetBytes(unsigned char *buf, size_t len);
+
+  // Called by the RPC thread. Copy len bytes from buf and wake up the
+  // compilation thread if it is waiting. Return the number of bytes copied.
+  size_t PutBytes(unsigned char *buf, size_t len);
+
+  // Called by the RPC thread. Signal that all bytes have been received,
+  // so the last call to GetBytes will return the remaining bytes rather
+  // than waiting for the entire requested amound.
+  void SetDone();
+
+ private:
+  bool Done;
+  pthread_mutex_t Mutex;
+  pthread_cond_t Cond;
+
+  // Variables and functions to manage the circular queue
+  std::vector<unsigned char> Bytes;
+  size_t Prod; // Queue producer index
+  size_t Cons; // Queue consumer index
+  size_t queueSize() {
+    return Prod >= Cons ? Prod - Cons : Bytes.size() - (Cons - Prod);
+  }
+  size_t capacityRemaining() {
+    return (Prod >= Cons ? Bytes.size() - (Prod - Cons) : (Cons - Prod)) - 1;
+  }
+  void queuePut(unsigned char *buf, size_t len);
+  void queueGet(unsigned char *buf, size_t len);
+};
+
+// Class to manage the compliation thread and serve as the interface from
+// the SRPC thread
+class SRPCStreamer  {
+public:
+  SRPCStreamer() : Error(false) {}
+  // Initialize streamer, create a new thread running Callback, and
+  // return a pointer to the DataStreamer the threads will use to
+  // synchronize. On error, return NULL and fill in the ErrorMsg string
+  llvm::DataStreamer *init(void *(*Callback)(void *),
+                           void *arg, std::string *ErrMsg);
+  // Called by the RPC thread. Copy len bytes from buf. Return bytes copied.
+  size_t gotChunk(unsigned char *bytes, size_t len);
+  // Called by the RPC thread. Wait for the compilation thread to finish.
+  int streamEnd(std::string *ErrMsg);
+  // Called by the compilation thread. Signal that there was a compilation
+  // error so the RPC thread can abort the stream.
+  void setError() { Error = true; }
+private:
+  bool Error;
+  QueueStreamer Q;
+  pthread_t CompileThread;
+};
+
+
+
+#endif  // SRPCSTREAMER_H
diff --git a/tools/llc/StubMaker.cpp b/tools/llc/StubMaker.cpp
new file mode 100644
index 0000000000..cc343280a3
--- /dev/null
+++ b/tools/llc/StubMaker.cpp
@@ -0,0 +1,233 @@
+// Create a high-level representation of the needed library.
+
+#include "StubMaker.h"
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "ELFStub.h"
+
+using namespace llvm;
+
+// Extract the Name, Version, and IsDefault flag from the FullName string.
+// e.g. foo@V1  --> foo, V1, false
+//      bar@@V2 --> bar, V2, true
+static void ExtractVersion(StringRef FullName,
+                           StringRef &Name,
+                           StringRef &Version,
+                           bool &IsDefault) {
+  size_t atpos = FullName.find('@');
+  if (atpos == StringRef::npos) {
+    Name = FullName;
+    Version = "";
+    IsDefault = false;
+    return;
+  }
+  Name = FullName.substr(0, atpos);
+  ++atpos;
+  if (FullName[atpos] == '@') {
+    IsDefault = true;
+    ++atpos;
+  } else {
+    IsDefault = false;
+  }
+  Version = FullName.substr(atpos);
+}
+
+
+// This implicitly creates a version record as a result of locating a symbol
+// with this version. There is normally more information attached to a
+// version definition: the parent version(s) and definition flags (weak
+// or base). This information is currently not stored in the bitcode
+// module. It may be necessary to add this in the future.
+static Elf32_Half AddVersionDef(ELFStub *Stub, StringRef Name) {
+  VersionDefinition VD;
+  VD.Name = Name;
+  VD.Index = Stub->NextIndex++;
+  VD.IsWeak = false; // TODO(pdox): Implement
+  VD.Parents.clear(); // TODO(pdox): Implement
+  Stub->VerDefs.push_back(VD);
+  Stub->IndexMap[VD.Name] = VD.Index;
+  return VD.Index;
+}
+
+static Elf32_Half GetVersionIndex(StringRef Version, ELFStub *Stub) {
+  // Handle unversioned symbols
+  if (Version.empty())
+    return 1; /* ELF::VER_NDX_GLOBAL */
+  // Find the version definition, if it already exists.
+  StringMap<Elf32_Half>::const_iterator I = Stub->IndexMap.find(Version);
+  if (I != Stub->IndexMap.end()) {
+    return I->second;
+  }
+  // If not, create it.
+  return AddVersionDef(Stub, Version);
+}
+
+static Elf32_Half GetELFMachine(const Triple &T) {
+  switch (T.getArch()) {
+    default: llvm_unreachable("Unknown target triple in StubMaker.cpp");
+    case Triple::x86_64: return ELF::EM_X86_64;
+    case Triple::x86: return ELF::EM_386;
+    case Triple::arm: return ELF::EM_ARM;
+    case Triple::mipsel: return ELF::EM_MIPS;
+  }
+}
+
+static unsigned char GetELFVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  case GlobalValue::DefaultVisibility: return ELF::STV_DEFAULT;
+  case GlobalValue::HiddenVisibility: return ELF::STV_HIDDEN;
+  case GlobalValue::ProtectedVisibility: return ELF::STV_PROTECTED;
+  }
+  llvm_unreachable("Unknown visibility in GETELFVisibility");
+}
+
+static ELF::Elf32_Word GetElfSizeForType(const GlobalValue *GV,
+                                         const Type *ElemType) {
+  unsigned bit_size = ElemType->getPrimitiveSizeInBits();
+  if (bit_size != 0) {
+    // Check against 0 to see if it was actually a primitive.
+    return bit_size / 8;
+  }
+  if (isa<PointerType>(ElemType)) {
+    // Pointers are 32-bit for NaCl.
+    return 4;
+  }
+  if (isa<FunctionType>(ElemType)) {
+    // This is not a data object, so just say unknown (0).
+    return 0;
+  }
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(ElemType)) {
+    unsigned elem_size = GetElfSizeForType(GV, ATy->getElementType());
+    unsigned num_elems = ATy->getNumElements();
+    // TODO(jvoung): Come up with a test for what to do with 0-length arrays.
+    // Not sure what to do here actually.  It may be that the 0-length
+    // array is meant to be an opaque type, which you can never check the
+    // "sizeof".  For now, return 0 instead of asserting.
+    // Known instance of this in library code is in basic_string.h:
+    //    static size_type _S_empty_rep_storage[];
+    return elem_size * num_elems;
+  }
+  if (const VectorType *VTy = dyn_cast<VectorType>(ElemType)) {
+    unsigned bit_width = VTy->getBitWidth();
+    if (bit_width) {
+      return bit_width / 8;
+    } else {
+      // It's a vector of pointers, and pointers are 32-bit in NaCl
+      return VTy->getNumElements() * 4;
+    }
+  }
+  if (const StructType *STy = dyn_cast<StructType>(ElemType)) {
+    // Alignment padding should have been added to the type in the front-end.
+    unsigned size_so_far = 0;
+    for (unsigned i = 0; i < STy->getNumElements(); ++i) {
+      size_so_far += GetElfSizeForType(GV, STy->getElementType(i));
+    }
+    return size_so_far;
+  }
+  // Unknown type!
+  DEBUG({
+      dbgs() << "Unknown GetELFSize for var=";
+      GV->dump();
+      dbgs() << " type= ";
+      ElemType->dump();
+      dbgs() << "\n";
+    });
+  llvm_unreachable("Unhandled type for GetELFSize");
+  return 0;
+}
+
+// Return a value for the symbol table's st_size, which is the number of bytes
+// in a data object.  Functions may report unknown size 0 (not data objects).
+// This is known to be important for symbols that may sit in BSS
+// with copy relocations (to know how much to copy).
+static ELF::Elf32_Word GetELFSize(const GlobalValue *GV) {
+  const class PointerType *PT = GV->getType();
+  const Type *ElemType = PT->getElementType();
+  return GetElfSizeForType(GV, ElemType);
+}
+
+static unsigned char GetELFType(const GlobalValue *GV) {
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+    return GVar->isThreadLocal() ? ELF::STT_TLS : ELF::STT_OBJECT;
+  } else if (isa<Function>(GV)) {
+    // TODO(pdox): Handle STT_GNU_IFUNC
+    return ELF::STT_FUNC;
+  }
+  // TODO(pdox): Do we need to resolve GlobalAliases?
+  llvm_unreachable("Unknown GlobalValue type in GetELFType!");
+}
+
+static unsigned char GetELFBinding(const GlobalValue *GV) {
+  // TODO(pdox):
+  // This information would ideally be made to match the symbol binding
+  // as declared in the original shared object. However, GV is only the
+  // declaration for this symbol, so we cannot derive the definition's
+  // binding here. But it seems like it should be fine to always set it to
+  // STB_GLOBAL, since we already know this symbol is the prevailing
+  // definition.
+  return ELF::STB_GLOBAL;
+}
+
+static void MakeOneStub(const Module &M,
+                        const Module::NeededRecord &NR,
+                        ELFStub *Stub) {
+  Stub->SOName = NR.DynFile;
+  Stub->NextIndex = 2; // 0,1 are reserved
+  for (unsigned j = 0; j < NR.Symbols.size(); ++j) {
+    StringRef FullName = NR.Symbols[j];
+    GlobalValue *GV = M.getNamedValue(FullName);
+    if (!GV) {
+      // The symbol may have been removed by optimization or dead code
+      // elimination, so this is not an error.
+      continue;
+    }
+    StringRef Name;
+    StringRef Version;
+    bool IsDefault;
+    ExtractVersion(FullName, Name, Version, IsDefault);
+
+    SymbolStub SS;
+    SS.Name = Name;
+    SS.Type = GetELFType(GV);
+    SS.Binding = GetELFBinding(GV);
+    SS.Visibility = GetELFVisibility(GV);
+    SS.Size = GetELFSize(GV);
+    SS.VersionIndex = GetVersionIndex(Version, Stub);
+    SS.IsDefault = IsDefault;
+    Stub->Symbols.push_back(SS);
+  }
+}
+
+namespace llvm {
+
+// For module M, make all the stubs neededs and insert them into StubList.
+void MakeAllStubs(const Module &M, const Triple &T,
+                  SmallVectorImpl<ELFStub*> *StubList) {
+  std::vector<Module::NeededRecord> NRList;
+  M.getNeededRecords(&NRList);
+  Elf32_Half Machine = GetELFMachine(T);
+  for (unsigned i = 0; i < NRList.size(); ++i) {
+    const Module::NeededRecord &NR = NRList[i];
+    ELFStub *Stub = new ELFStub();
+    Stub->Machine = Machine;
+    MakeOneStub(M, NR, Stub);
+    StubList->push_back(Stub);
+  }
+}
+
+void FreeStubList(llvm::SmallVectorImpl<ELFStub*> *StubList) {
+  for (unsigned i = 0; i < StubList->size(); ++i) {
+    delete (*StubList)[i];
+  }
+  StubList->clear();
+}
+
+} // namespace
diff --git a/tools/llc/StubMaker.h b/tools/llc/StubMaker.h
new file mode 100644
index 0000000000..27e1e55d7f
--- /dev/null
+++ b/tools/llc/StubMaker.h
@@ -0,0 +1,20 @@
+#ifndef __STUB_MAKER_H
+#define __STUB_MAKER_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class Module;
+class Triple;
+class ELFStub;
+
+// For module M, make all required ELF stubs and insert them into StubList.
+void MakeAllStubs(const Module &M,
+                  const Triple &T,
+                  SmallVectorImpl<ELFStub*> *StubList);
+void FreeStubList(SmallVectorImpl<ELFStub*> *StubList);
+
+}
+
+#endif
diff --git a/tools/llc/TextStubWriter.cpp b/tools/llc/TextStubWriter.cpp
new file mode 100644
index 0000000000..ae6e2f77d3
--- /dev/null
+++ b/tools/llc/TextStubWriter.cpp
@@ -0,0 +1,84 @@
+// Using the high-level representation of an ELF stub, create a text version
+// of the ELF stub object.
+
+#include "TextStubWriter.h"
+
+#include <sstream>
+
+#include "ELFStub.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+namespace {
+
+std::string LibShortname(const std::string &fullname) {
+  std::string result = fullname;
+  if (result.find("lib") != std::string::npos) {
+    result = result.substr(3);
+  }
+  size_t so_pos = result.find(".so");
+  if (so_pos != std::string::npos) {
+    result = result.substr(0, so_pos);
+  }
+  return result;
+}
+
+const ELF::Elf32_Half kDummyCodeShndx = 5;
+const ELF::Elf32_Half kDummyDataShndx = 6;
+
+}  // namespace
+
+namespace llvm {
+
+// Write out the dynamic symbol table information.  The format must be kept
+// in sync with the changes in NaCl's version of gold (see gold/metadata.cc).
+void WriteTextELFStub(const ELFStub *Stub, std::string *output) {
+  std::stringstream ss;
+
+  ss << "#### Symtab for " << Stub->SOName << "\n";
+  ss << "@obj " << LibShortname(Stub->SOName) << " " << Stub->SOName << "\n";
+
+  // st_value is usually a relative address for .so, and .exe files.
+  // So, make some up.
+  ELF::Elf32_Addr fake_relative_addr = 0;
+  for (size_t i = 0; i < Stub->Symbols.size(); ++i) {
+    const SymbolStub &sym = Stub->Symbols[i];
+
+    ELF::Elf32_Addr st_value = fake_relative_addr;
+    ELF::Elf32_Word st_size = sym.Size;
+    unsigned int st_info = sym.Type | (sym.Binding << 4);
+    unsigned int st_other = sym.Visibility;
+    ELF::Elf32_Half st_shndx = sym.Type == ELF::STT_FUNC ?
+      kDummyCodeShndx : kDummyDataShndx;
+    ELF::Elf32_Half vd_ndx = sym.VersionIndex;
+    // Mark non-default versions hidden.
+    if (!sym.IsDefault) {
+      vd_ndx |= ELF::VERSYM_HIDDEN;
+    }
+
+    ss << "@sym "
+       << sym.Name << " " // Representative for st_name.
+       << (st_value) << " "
+       << (st_size) << " "
+       << (st_info) << " "
+       << (st_other) << " "
+       << (st_shndx) << " "
+       << (vd_ndx) << " "
+       << "\n";
+    fake_relative_addr += (sym.Size == 0 ? 4 : sym.Size);
+  }
+
+  // Now dump the version map.
+  ss << "#### VerDefs for " << Stub->SOName << "\n";
+  for (size_t i = 0; i < Stub->VerDefs.size(); ++i) {
+    const VersionDefinition &verdef = Stub->VerDefs[i];
+    ss << "@ver " << (Elf32_Half)(verdef.Index) << " " << verdef.Name << "\n";
+  }
+
+  ss << "\n";
+
+  output->append(ss.str());
+}
+
+} // namespace llvm
diff --git a/tools/llc/TextStubWriter.h b/tools/llc/TextStubWriter.h
new file mode 100644
index 0000000000..4dbc5978b2
--- /dev/null
+++ b/tools/llc/TextStubWriter.h
@@ -0,0 +1,12 @@
+#ifndef __TEXT_STUB_WRITER_H
+#define __TEXT_STUB_WRITER_H
+
+#include "ELFStub.h"
+
+namespace llvm {
+
+void WriteTextELFStub(const ELFStub *Stub, std::string *output);
+
+}
+
+#endif
diff --git a/tools/llc/nacl_file.cpp b/tools/llc/nacl_file.cpp
new file mode 100644
index 0000000000..af0e8625df
--- /dev/null
+++ b/tools/llc/nacl_file.cpp
@@ -0,0 +1,548 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+
+ * This file provides wrappers to lseek(2), read(2), etc. that read bytes from
+ * an mmap()'ed buffer.  There are three steps required:
+ *    1. Use linker aliasing to wrap lseek(), etc.  This is done in the
+ *       Makefile using the "-XLinker --wrap -Xlinker lseek" arguments to
+ *       nacl-gcc.  Note that this makes *all* calls to things like read() go
+ *       through these wrappers, so if you also need to read() from, say, a
+ *       socket, this code will not work as-is.
+ *    2. Use lseek(), read() etc as you normally would for a file.
+ *
+ * Note: This code is very temporary and will disappear when the Pepper 2 API
+ * is available in Native Client.
+ */
+
+#if defined(__native_client__) && defined(NACL_SRPC)
+
+#include <argz.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/nacl_syscalls.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <nacl/nacl_srpc.h>
+#ifdef __pnacl__
+#include <nacl/pnacl.h>
+#endif
+#include "SRPCStreamer.h"
+
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+
+using llvm::MemoryBuffer;
+using llvm::StringRef;
+using std::string;
+using std::map;
+
+#define MMAP_PAGE_SIZE 64 * 1024
+#define MMAP_ROUND_MASK (MMAP_PAGE_SIZE - 1)
+#define printerr(...)  fprintf(stderr, __VA_ARGS__)
+#define printdbg(...)
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+namespace {
+
+typedef std::vector<std::string> string_vector;
+
+// True if the bitcode to be compiled is for a shared library.
+// Used to return to the coordinator.
+bool g_bitcode_is_shared_library;
+// The soname of the current compilation unit, if it is a shared library.
+// Empty string otherwise.
+std::string* g_bitcode_soname = NULL;
+// The newline separated list of libraries that the current bitcode compilation
+// unit depends on.
+std::string* g_bitcode_lib_dependencies = NULL;
+// The filename used internally for looking up the bitcode file.
+char kBitcodeFilename[] = "pnacl.pexe";
+// The filename used internally for looking up the object code file.
+char kObjectFilename[] = "pnacl.o";
+// Object which manages streaming bitcode over SRPC and threading.
+SRPCStreamer *srpc_streamer;
+
+static size_t roundToNextPageSize(size_t size) {
+  size_t count_up = size + (MMAP_ROUND_MASK);
+  return (count_up & ~(MMAP_ROUND_MASK));
+}
+
+}  // namespace
+
+//TODO(dschuff): a little more elegant interface into llc than this?
+extern llvm::DataStreamer* NaClBitcodeStreamer;
+
+class FileInfo {
+ private:
+  static map<string, FileInfo*> descriptor_map_;
+
+  string filename_;
+  int fd_;
+  int size_;
+
+ public:
+  // Construct a FileInfo for a file descriptor.
+  // File descriptors are used for the bitcode (input) file and for the
+  // object (output) file passed in by the coordinator when using the Run
+  // SRPC.
+  // They are also used to represent an association with a shared memory
+  // region.  In this case the initial fd_ is -1, representing that the shared
+  // memory is not yet created.  Once data is ready to write, the size is
+  // computed and a shared memory descriptor is stored in fd_.
+  FileInfo(string fn, int fd) :
+    filename_(fn), fd_(fd), size_(-1) {
+    printdbg("DBG: registering file %d (%s) %d\n", fd, fn.c_str(), size_);
+    descriptor_map_[fn] = this;
+    if (fd >= 0) {
+      struct stat stb;
+      int result = fstat(fd_, &stb);
+      if (result != 0) {
+        printerr("ERROR: cannot stat %d (%s)\n", fd, fn.c_str());
+      }
+      size_ = stb.st_size;;
+    }
+  }
+
+  int GetSize() {
+    if (fd_ < 0) {
+      printerr("ERROR: file has not been initialized!\n");
+    }
+    return size_;
+  }
+
+  int GetFd() {
+    return fd_;
+  }
+
+  MemoryBuffer* ReadAllDataAsMemoryBuffer() {
+    printdbg("DBG: reading file %d (%s): %d bytes\n",
+             fd_, filename_.c_str(), size_);
+
+    const int count_up = roundToNextPageSize(size_);
+    char *buf = (char *) mmap(NULL, count_up, PROT_READ, MAP_SHARED, fd_, 0);
+    if (NULL == buf) {
+      printerr("ERROR: mmap call failed!\n");
+      return 0;
+    }
+
+    printdbg("after mapping %p %d\n", buf, size_);
+    // This copies the data into a new buffer
+    MemoryBuffer* mb = MemoryBuffer::getMemBufferCopy(StringRef(buf, size_));
+    munmap(buf, count_up);
+    printdbg("after unmapping %p %d\n",
+             mb->getBufferStart(), mb->getBufferSize());
+    return mb;
+  }
+
+  void WriteAllDataToTmpFile(string data) {
+    printdbg("DBG: writing file %d (%s): %d bytes\n",
+             fd_, filename_.c_str(), data.size());
+
+    if (fd_ < 0) {
+      printerr("ERROR: invalid fd for write\n");
+      return;
+    }
+    size_t bytes_to_write = data.size();
+    const char* buf = data.c_str();
+    while (bytes_to_write > 0) {
+      ssize_t bytes_written = write(fd_, (const void*) buf, bytes_to_write);
+      if (bytes_written < 0) {
+        printerr("ERROR: write failed\n");
+        return;
+      }
+      buf += bytes_written;
+      bytes_to_write -= (size_t) bytes_written;
+    }
+  }
+
+  // TODO(sehr): remove this method once switched to using the Run SRPC.
+  void WriteAllDataToShmem(string data) {
+    printdbg("DBG: writing file %d (%s): %d bytes\n",
+             fd_, filename_.c_str(), data.size());
+
+    if (fd_ >= 0) {
+      printerr("ERROR: cannot write file twice\n");
+      return;
+    }
+    const int count_up =  roundToNextPageSize(data.size());
+    const int fd = imc_mem_obj_create(count_up);
+    if (fd < 0) {
+      printerr("ERROR: imc_mem_obj_create failed\n");
+      return;
+    }
+
+    char* buf = (char *) mmap(NULL, count_up, PROT_WRITE, MAP_SHARED, fd, 0);
+    if (NULL == buf) {
+      printerr("ERROR: cannot map shm for write\n");
+      return;
+    }
+
+    memcpy(buf, data.c_str(), data.size());
+    munmap(buf, count_up);
+    fd_ = fd;
+    size_ = data.size();
+  }
+
+  void WriteAllData(string data) {
+    if (fd_ < 0) {
+      WriteAllDataToShmem(data);
+    } else {
+      WriteAllDataToTmpFile(data);
+    }
+  }
+
+  static FileInfo* FindFileInfo(const string& fn) {
+    map<string, FileInfo*>::iterator it = descriptor_map_.find(fn);
+    if (it == descriptor_map_.end()) {
+      printerr("ERROR: no mapping for filename\n");
+      return NULL;
+    }
+    return it->second;
+  }
+};
+
+map<string, FileInfo*> FileInfo::descriptor_map_;
+
+extern int llc_main(int argc, char **argv);
+
+
+MemoryBuffer* NaClGetMemoryBufferForFile(const char* filename) {
+  FileInfo* fi = FileInfo::FindFileInfo(filename);
+  if (fi == NULL) {
+    printerr("ERROR: unknown file %s\n", filename);
+    return NULL;
+  }
+  return fi->ReadAllDataAsMemoryBuffer();
+}
+
+void NaClOutputStringToFile(const char* filename, const string& data) {
+  FileInfo* fi = FileInfo::FindFileInfo(filename);
+  fi->WriteAllData(data);
+}
+
+void NaClRecordObjectInformation(bool is_shared, const std::string& soname) {
+  // This function is invoked to begin recording library information.
+  // To make it reentrant, we clean up what might be left over from last time.
+  delete g_bitcode_soname;
+  delete g_bitcode_lib_dependencies;
+  // Then remember the module global information.
+  g_bitcode_is_shared_library = is_shared;
+  g_bitcode_soname = new std::string(soname);
+  g_bitcode_lib_dependencies = new std::string();
+}
+
+void NaClRecordSharedLibraryDependency(const std::string& library_name) {
+  const std::string& kDelimiterString("\n");
+  *g_bitcode_lib_dependencies += (library_name + kDelimiterString);
+}
+
+namespace {
+
+int DoTranslate(string_vector* cmd_line_vec, int bitcode_fd, int object_fd) {
+  if (cmd_line_vec == NULL) {
+    return 1;
+  }
+  if (bitcode_fd) {
+    // Add mapping for bitcode file (side effect is to register the file).
+    new FileInfo(kBitcodeFilename, bitcode_fd);
+  }
+  // Add mapping for object file (side effect is to register the file).
+  new FileInfo(kObjectFilename, object_fd);
+  // Make an argv array from the input vector.
+  size_t argc = cmd_line_vec->size();
+  char** argv = new char*[argc];
+  for (size_t i = 0; i < argc; ++i) {
+    // llc_main will not mutate the command line, so this is safe.
+    argv[i] = const_cast<char*>((*cmd_line_vec)[i].c_str());
+  }
+  argv[argc] = NULL;
+  // Call main.
+  return llc_main(static_cast<int>(argc), argv);
+}
+
+string_vector* CommandLineFromArgz(char* str, size_t str_len) {
+  char* entry = str;
+  string_vector* vec = new string_vector;
+  while (entry != NULL) {
+    vec->push_back(entry);
+    entry = argz_next(str, str_len, entry);
+  }
+  // Add fixed arguments to the command line.  These specify the bitcode
+  // and object code filenames, removing them from the contract with the
+  // coordinator.
+  vec->push_back(kBitcodeFilename);
+  vec->push_back("-o");
+  vec->push_back(kObjectFilename);
+  return vec;
+}
+
+void run(NaClSrpcRpc *rpc,
+         NaClSrpcArg **in_args,
+         NaClSrpcArg **out_args,
+         NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  int bitcode_fd = in_args[0]->u.hval;
+  int object_fd = in_args[1]->u.hval;
+  char* command_line = in_args[2]->arrays.carr;
+  size_t command_line_len = in_args[2]->u.count;
+  string_vector* cmd_line_vec =
+      CommandLineFromArgz(command_line, command_line_len);
+  if (DoTranslate(cmd_line_vec, bitcode_fd, object_fd) != 0) {
+    printerr("DoTranslate failed.\n");
+    return;
+  }
+  delete cmd_line_vec;
+  out_args[0]->u.ival = g_bitcode_is_shared_library;
+  // SRPC deletes the strings returned when the closure is invoked.
+  // Therefore we need to use strdup.
+  out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str());
+  out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str());
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+string_vector* GetDefaultCommandLine() {
+  string_vector* command_line = new string_vector;
+  size_t i;
+  // First, those common to all architectures.
+  static const char* common_args[] = { "pnacl_translator",
+                                       "-filetype=obj",
+                                       kBitcodeFilename,
+                                       "-o",
+                                       kObjectFilename };
+  for (i = 0; i < ARRAY_SIZE(common_args); ++i) {
+    command_line->push_back(common_args[i]);
+  }
+  // Then those particular to a platform.
+  static const char* llc_args_x8632[] = { "-march=x86",
+                                          "-mcpu=pentium4",
+                                          "-mtriple=i686-none-nacl-gnu",
+                                          NULL };
+  static const char* llc_args_x8664[] = { "-march=x86-64",
+                                          "-mcpu=core2",
+                                          "-mtriple=x86_64-none-nacl-gnu",
+                                          NULL };
+  static const char* llc_args_arm[] = { "-mcpu=cortex-a8",
+                                        "-mtriple=armv7a-none-nacl-gnueabi",
+                                        "-arm-reserve-r9",
+                                        "-sfi-disable-cp",
+                                        "-sfi-store",
+                                        "-sfi-load",
+                                        "-sfi-stack",
+                                        "-sfi-branch",
+                                        "-sfi-data",
+                                        "-no-inline-jumptables",
+                                        "-float-abi=hard",
+                                        NULL };
+
+  const char **llc_args = NULL;
+#if defined (__pnacl__)
+  switch (__builtin_nacl_target_arch()) {
+    case PnaclTargetArchitectureX86_32: {
+      llc_args = llc_args_x8632;
+      break;
+    }
+    case PnaclTargetArchitectureX86_64: {
+      llc_args = llc_args_x8664;
+      break;
+    }
+    case PnaclTargetArchitectureARM_32: {
+      llc_args = llc_args_arm;
+      break;
+    }
+    default:
+      printerr("no target architecture match.\n");
+      delete command_line;
+      command_line = NULL;
+      break;
+  }
+#elif defined (__i386__)
+  llc_args = llc_args_x8632;
+#elif defined (__x86_64__)
+  llc_args = llc_args_x8664;
+#else
+#error
+#endif
+  for (i = 0; llc_args[i] != NULL; i++) command_line->push_back(llc_args[i]);
+  return command_line;
+}
+
+void run_with_default_command_line(NaClSrpcRpc *rpc,
+                                   NaClSrpcArg **in_args,
+                                   NaClSrpcArg **out_args,
+                                   NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  int bitcode_fd = in_args[0]->u.hval;
+  int object_fd = in_args[1]->u.hval;
+  string_vector* cmd_line_vec = GetDefaultCommandLine();
+  if (DoTranslate(cmd_line_vec, bitcode_fd, object_fd) != 0) {
+    printerr("DoTranslate failed.\n");
+    return;
+  }
+  delete cmd_line_vec;
+  out_args[0]->u.ival = g_bitcode_is_shared_library;
+  // SRPC deletes the strings returned when the closure is invoked.
+  // Therefore we need to use strdup.
+  out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str());
+  out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str());
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+// Data passed from main thread to compile thread.
+// Takes ownership of the commandline vector.
+class StreamingThreadData {
+ public:
+  StreamingThreadData(int object_fd, string_vector* cmd_line_vec) :
+      object_fd_(object_fd), cmd_line_vec_(cmd_line_vec) {}
+  int ObjectFD() const { return object_fd_; }
+  string_vector* CmdLineVec() const { return cmd_line_vec_.get(); }
+  const int object_fd_;
+  const llvm::OwningPtr<string_vector> cmd_line_vec_;
+};
+
+void *run_streamed(void *arg) {
+  StreamingThreadData* data = reinterpret_cast<StreamingThreadData*>(arg);
+  data->CmdLineVec()->push_back("-streaming-bitcode");
+  if (DoTranslate(data->CmdLineVec(), 0, data->ObjectFD()) != 0) {
+    printerr("DoTranslate failed.\n");
+    srpc_streamer->setError();
+    return NULL;
+  }
+  delete data;
+  return NULL;
+}
+
+// Actually do the work for stream initialization.
+void do_stream_init(NaClSrpcRpc *rpc,
+                    NaClSrpcArg **in_args,
+                    NaClSrpcArg **out_args,
+                    NaClSrpcClosure *done,
+                    string_vector* command_line_vec) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  srpc_streamer = new SRPCStreamer();
+  std::string StrError;
+  StreamingThreadData* thread_data = new StreamingThreadData(
+      in_args[0]->u.hval, command_line_vec);
+  NaClBitcodeStreamer = srpc_streamer->init(run_streamed,
+      reinterpret_cast<void *>(thread_data),
+      &StrError);
+  if (NaClBitcodeStreamer) {
+    rpc->result = NACL_SRPC_RESULT_OK;
+    out_args[0]->arrays.str = strdup("no error");
+  } else {
+    out_args[0]->arrays.str = strdup(StrError.c_str());
+  }
+}
+
+// Invoked by the StreamInit RPC to initialize bitcode streaming over SRPC.
+// Under the hood it forks a new thread at starts the llc_main, which sets
+// up the compilation and blocks when it tries to start reading the bitcode.
+// Input arg is a file descriptor to write the output object file to.
+// Returns a string, containing an error message if the call fails.
+void stream_init(NaClSrpcRpc *rpc,
+                 NaClSrpcArg **in_args,
+                 NaClSrpcArg **out_args,
+                 NaClSrpcClosure *done) {
+  // cmd_line_vec allocated by GetDefaultCommandLine() is freed by the
+  // translation thread in run_streamed()
+  do_stream_init(rpc, in_args, out_args, done, GetDefaultCommandLine());
+}
+
+// Invoked by StreamInitWithCommandLine RPC. Same as stream_init, but
+// provides a command line to use instead of the default.
+void stream_init_with_command_line(NaClSrpcRpc *rpc,
+                                   NaClSrpcArg **in_args,
+                                   NaClSrpcArg **out_args,
+                                   NaClSrpcClosure *done) {
+  char* command_line = in_args[1]->arrays.carr;
+  size_t command_line_len = in_args[1]->u.count;
+  string_vector* cmd_line_vec =
+      CommandLineFromArgz(command_line, command_line_len);
+  // cmd_line_vec is freed by the translation thread in run_streamed
+  do_stream_init(rpc, in_args, out_args, done, cmd_line_vec);
+}
+
+// Invoked by the StreamChunk RPC. Receives a chunk of the bitcode and
+// buffers it for later retrieval by the compilation thread.
+void stream_chunk(NaClSrpcRpc *rpc,
+                 NaClSrpcArg **in_args,
+                 NaClSrpcArg **out_args,
+                 NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  size_t len = in_args[0]->u.count;
+  unsigned char *bytes = reinterpret_cast<unsigned char*>(
+      in_args[0]->arrays.carr);
+  if (srpc_streamer->gotChunk(bytes, len) != len) {
+    return;
+  }
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+// Invoked by the StreamEnd RPC. Waits until the compilation finishes,
+// then returns. Returns an int indicating whether the bitcode is a
+// shared library, a string with the soname, a string with dependencies,
+// and a string which contains an error message if applicable.
+void stream_end(NaClSrpcRpc *rpc,
+                NaClSrpcArg **in_args,
+                NaClSrpcArg **out_args,
+                NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  std::string StrError;
+  if (srpc_streamer->streamEnd(&StrError)) {
+    out_args[3]->arrays.str = strdup(StrError.c_str());
+    return;
+  }
+  out_args[0]->u.ival = g_bitcode_is_shared_library;
+  // SRPC deletes the strings returned when the closure is invoked.
+  // Therefore we need to use strdup.
+  out_args[1]->arrays.str = strdup(g_bitcode_soname->c_str());
+  out_args[2]->arrays.str = strdup(g_bitcode_lib_dependencies->c_str());
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+const struct NaClSrpcHandlerDesc srpc_methods[] = {
+  { "Run:hhC:iss", run },
+  { "RunWithDefaultCommandLine:hh:iss", run_with_default_command_line },
+  // Protocol for streaming:
+  // (StreamInit(obj_fd) -> error_str |
+  //    StreamInitWIthCommandLine(obj_fd, escaped_cmdline) -> error_str)
+  // StreamChunk(data) +
+  // StreamEnd() -> (is_shared_lib,soname,dependencies,error_str)
+  { "StreamInit:h:s", stream_init },
+  { "StreamInitWithCommandLine:hC:s:", stream_init_with_command_line },
+  { "StreamChunk:C:", stream_chunk },
+  { "StreamEnd::isss", stream_end },
+  { NULL, NULL },
+};
+
+}  // namespace
+
+int
+main() {
+  if (!NaClSrpcModuleInit()) {
+    return 1;
+  }
+
+  if (!NaClSrpcAcceptClientConnection(srpc_methods)) {
+    return 1;
+  }
+  NaClSrpcModuleFini();
+  return 0;
+}
+
+#endif
diff --git a/tools/pso-stub/CMakeLists.txt b/tools/pso-stub/CMakeLists.txt
new file mode 100644
index 0000000000..4b2f779cb0
--- /dev/null
+++ b/tools/pso-stub/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader bitwriter object support analysis)
+
+add_llvm_tool(pso-stub
+  pso-stub.cpp
+  )
diff --git a/tools/pso-stub/LLVMBuild.txt b/tools/pso-stub/LLVMBuild.txt
new file mode 100644
index 0000000000..e643053dbf
--- /dev/null
+++ b/tools/pso-stub/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pso-stub/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pso-stub
+parent = Tools
+required_libraries = BitReader BitWriter Object Support Analysis
diff --git a/tools/pso-stub/Makefile b/tools/pso-stub/Makefile
new file mode 100644
index 0000000000..c2860e65f6
--- /dev/null
+++ b/tools/pso-stub/Makefile
@@ -0,0 +1,18 @@
+##===- tools/pso-stub/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pso-stub
+LINK_COMPONENTS := bitreader bitwriter object support analysis
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/pso-stub/pso-stub.cpp b/tools/pso-stub/pso-stub.cpp
new file mode 100644
index 0000000000..45d5347e22
--- /dev/null
+++ b/tools/pso-stub/pso-stub.cpp
@@ -0,0 +1,307 @@
+/*===- pso-stub.c - Create bitcode shared object stubs  -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Create a bitcode stub for a native shared object.
+// Usage: pso-stub <input.so> -o <output.pso>
+//
+// The stub bitcode file contains the same dynamic symbols as the input shared
+// object, with identical attributes (e.g. weak, undefined, TLS).
+//
+// Undefined functions become declarations in the bitcode.
+// Undefined variables become external variable declarations in the bitcode.
+// Defined functions become trivial stub functions in the bitcode (which do
+// nothing but "ret void").
+// Defined object/tls symbols became dummy variable definitions (int foo = 0).
+//
+// The generated bitcode is suitable for linking against (as a shared object),
+// but nothing else.
+//
+// TODO(pdox): Implement GNU symbol versioning.
+// TODO(pdox): Mark IFUNC symbols as functions, and store
+//             this attribute as metadata.
+//===----------------------------------------------------------------------===*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/APInt.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+
+cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input native shared object>"),
+              cl::init(""));
+
+cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
+
+// Variables / declarations to place in llvm.used array.
+std::vector<GlobalValue*> LLVMUsed;
+
+void AddUsedGlobal(GlobalValue *GV) {
+  // Clang normally asserts that these are not decls.  We do need
+  // decls to survive though, and those are really the ones we
+  // worry about, so only add those.
+  // We run verifyModule() below, so that we know this is somewhat valid.
+  if (GV->isDeclaration()) {
+    LLVMUsed.push_back(GV);
+  }
+}
+
+// Emit llvm.used array.
+// This is almost exactly like clang/lib/CodeGen/CodeGenModule.cpp::EmitLLVMUsed
+void EmitLLVMUsed(Module *M) {
+  // Don't create llvm.used if there is no need.
+  if (LLVMUsed.empty())
+    return;
+
+  Type *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+  // Convert LLVMUsed to what ConstantArray needs.
+  SmallVector<llvm::Constant*, 8> UsedArray;
+  UsedArray.resize(LLVMUsed.size());
+  for (unsigned i = 0, e = LLVMUsed.size(); i != e; ++i) {
+    UsedArray[i] =
+     llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*LLVMUsed[i]),
+                                    Int8PtrTy);
+  }
+
+  if (UsedArray.empty())
+    return;
+  llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size());
+
+  llvm::GlobalVariable *GV =
+    new llvm::GlobalVariable(*M, ATy, false,
+                             llvm::GlobalValue::AppendingLinkage,
+                             llvm::ConstantArray::get(ATy, UsedArray),
+                             "llvm.used");
+
+  GV->setSection("llvm.metadata");
+}
+
+// Add a stub function definition or declaration
+void
+AddFunction(Module *M,
+            GlobalValue::LinkageTypes Linkage,
+            const StringRef &Name,
+            bool isDefine) {
+  // Create an empty function with no arguments.
+  // void Name(void);
+  Type *RetTy = Type::getVoidTy(M->getContext());
+  FunctionType *FT = FunctionType::get(RetTy, /*isVarArg=*/ false);
+  Function *F = Function::Create(FT, Linkage, Name, M);
+  if (isDefine) {
+    // Add a single basic block with "ret void"
+    BasicBlock *BB = BasicBlock::Create(F->getContext(), "", F);
+    BB->getInstList().push_back(ReturnInst::Create(F->getContext()));
+  }
+  AddUsedGlobal(F);
+}
+
+// Add a stub global variable declaration or definition.
+void
+AddGlobalVariable(Module *M,
+          GlobalValue::LinkageTypes Linkage,
+          const StringRef &Name,
+          bool isTLS,
+          bool isDefine) {
+  // Use 'int' as the dummy type.
+  Type *Ty = Type::getInt32Ty(M->getContext());
+
+  Constant *InitVal = NULL;
+  if (isDefine) {
+    // Define to dummy value, 0.
+    InitVal = Constant::getIntegerValue(Ty, APInt(32, 0));
+  }
+  GlobalVariable *GV =
+    new GlobalVariable(*M, Ty, /*isConstant=*/ false,
+                       Linkage, /*Initializer=*/ InitVal,
+                       Twine(Name), /*InsertBefore=*/ NULL, isTLS,
+                       /*AddressSpace=*/ 0);
+  AddUsedGlobal(GV);
+}
+
+// Iterate through the ObjectFile's needed libraries, and
+// add them to the module.
+void TransferLibrariesNeeded(Module *M, const ObjectFile *obj) {
+  library_iterator it = obj->begin_libraries_needed();
+  library_iterator ie = obj->end_libraries_needed();
+  error_code ec;
+  for (; it != ie; it.increment(ec)) {
+    StringRef path;
+    it->getPath(path);
+    outs() << "Adding library " << path << "\n";
+    M->addLibrary(path);
+  }
+}
+
+// Set the Module's SONAME from the ObjectFile
+void TransferLibraryName(Module *M, const ObjectFile *obj) {
+  StringRef soname = obj->getLoadName();
+  outs() << "Setting soname to: " << soname << "\n";
+  M->setSOName(soname);
+}
+
+// Create stubs in the module for the dynamic symbols
+void TransferDynamicSymbols(Module *M, const ObjectFile *obj) {
+  // Iterate through the dynamic symbols in the ObjectFile.
+  symbol_iterator it = obj->begin_dynamic_symbols();
+  symbol_iterator ie = obj->end_dynamic_symbols();
+  error_code ec;
+  for (; it != ie; it.increment(ec)) {
+    const SymbolRef &sym = *it;
+    StringRef Name;
+    SymbolRef::Type Type;
+    uint32_t Flags;
+
+    sym.getName(Name);
+    sym.getType(Type);
+    sym.getFlags(Flags);
+
+    // Ignore debug info and section labels
+    if (Flags & SymbolRef::SF_FormatSpecific)
+      continue;
+
+    // Ignore local symbols
+    if (!(Flags & SymbolRef::SF_Global))
+      continue;
+    outs() << "Transferring symbol " << Name << "\n";
+
+    bool isFunc = (Type == SymbolRef::ST_Function);
+    bool isUndef = (Flags & SymbolRef::SF_Undefined);
+    bool isTLS = (Flags & SymbolRef::SF_ThreadLocal);
+    bool isCommon = (Flags & SymbolRef::SF_Common);
+    bool isWeak = (Flags & SymbolRef::SF_Weak);
+
+    if (Type == SymbolRef::ST_Unknown) {
+      // Weak symbols can be "v" according to NM, which are definitely
+      // data, but they may also be "w", which are of unknown type.
+      // Thus there is already a mechanism to say "weak object", but not
+      // for weak function.  Assume unknown weak symbols are functions.
+      if (isWeak) {
+        outs() << "Warning: Symbol '" << Name <<
+            "' has unknown type (weak). Assuming function.\n";
+        Type = SymbolRef::ST_Function;
+        isFunc = true;
+      } else {
+        // If it is undef, we likely don't care, since it won't be used
+        // to bind to unresolved symbols in the real pexe and real pso.
+        // Other cases seen where it is not undef: _end, __bss_start,
+        // which are markers provided by the linker scripts.
+        outs() << "Warning: Symbol '" << Name <<
+            "' has unknown type (isUndef=" << isUndef << "). Assuming data.\n";
+        Type = SymbolRef::ST_Data;
+        isFunc = false;
+      }
+    }
+
+    // Determine Linkage type.
+    GlobalValue::LinkageTypes Linkage;
+    if (isWeak)
+      Linkage = isUndef ? GlobalValue::ExternalWeakLinkage :
+                          GlobalValue::WeakAnyLinkage;
+    else if (isCommon)
+      Linkage = GlobalValue::CommonLinkage;
+    else
+      Linkage = GlobalValue::ExternalLinkage;
+
+    if (isFunc)
+      AddFunction(M, Linkage, Name, !isUndef);
+    else
+      AddGlobalVariable(M, Linkage, Name, isTLS, !isUndef);
+  }
+}
+
+}  // namespace
+
+
+int main(int argc, const char** argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv,
+                              "Portable Shared Object Stub Maker\n");
+
+  if (InputFilename.empty()) {
+    errs() << "Please specify an input filename\n";
+    return 1;
+  }
+  if (OutputFilename.empty()) {
+    errs() << "Please specify an output filename with -o\n";
+    return 1;
+  }
+
+  // Open the object file
+  OwningPtr<MemoryBuffer> File;
+  if (MemoryBuffer::getFile(InputFilename, File)) {
+    errs() << InputFilename << ": Open failed\n";
+    return 1;
+  }
+
+  ObjectFile *obj = ObjectFile::createObjectFile(File.take());
+  if (!obj) {
+    errs() << InputFilename << ": Object type not recognized\n";
+  }
+
+  // Create the new module
+  OwningPtr<Module> M(new Module(InputFilename, Context));
+
+  // Transfer the relevant ELF information
+  M->setOutputFormat(Module::SharedOutputFormat);
+  TransferLibrariesNeeded(M.get(), obj);
+  TransferLibraryName(M.get(), obj);
+  TransferDynamicSymbols(M.get(), obj);
+  EmitLLVMUsed(M.get());
+
+  // Verify the module
+  std::string Err;
+  if (verifyModule(*M.get(), ReturnStatusAction, &Err)) {
+    errs() << "Module created is invalid:\n";
+    errs() << Err;
+    return 1;
+  }
+
+  // Write the module to a file
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out(
+      new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                           raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+  WriteBitcodeToFile(M.get(), Out->os());
+  Out->keep();
+  return 0;
+}
author	Derek Schuff <dschuff@chromium.org>	2012-07-03 15:48:25 -0700
committer	Derek Schuff <dschuff@chromium.org>	2012-07-03 15:48:25 -0700
commit	4f429c8b4e06d750b5464b6eafdd102af5196bdd (patch)
tree	22a752c4654e3ab9e94c09739f7fb8f9e705433d
parent	e91f926f3b76774aa7ed4c327fbde6a39e42c87f (diff)