476 files changed, 36501 insertions, 690 deletions
diff --git a/Makefile b/Makefile
index 7a1b190459..aaba7fe060 100644
--- a/Makefile
+++ b/Makefile
@@ -43,6 +43,11 @@ EXTRA_DIST := test unittests llvm.spec include win32 Xcode
 
 include $(LEVEL)/Makefile.config
 
+ifeq ($(NACL_SANDBOX),1)
+  DIRS := $(filter-out tools/llvm-shlib runtime docs unittests, $(DIRS))
+  OPTIONAL_DIRS :=
+endif
+
 ifneq ($(ENABLE_SHARED),1)
   DIRS := $(filter-out tools/llvm-shlib, $(DIRS))
 endif
@@ -121,6 +126,7 @@ cross-compile-build-tools:
 	fi; \
 	($(MAKE) -C BuildTools \
 	  BUILD_DIRS_ONLY=1 \
+	  NACL_SANDBOX=0 \
 	  UNIVERSAL= \
 	  UNIVERSAL_SDK_PATH= \
 	  SDKROOT= \
diff --git a/Makefile.rules b/Makefile.rules
index f0c542b7f8..060bda3358 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -657,6 +657,23 @@ else
 endif
 endif
 
+ifeq ($(NACL_SANDBOX),1)
+  # NOTE: we specify --noirt to tell the driver that we should link
+  # against private (non-stable, non-IRT) libraries for the
+  # sandboxed translator.  This could have been specified directly,
+  # except that LLVM slips in -lpthread elsewhere in the build system,
+  # and we need it to use -lpthread_private instead.
+  LIBS += -Wl,--noirt -lsrpc -limc_syscalls -lplatform -lgio -lpthread \
+	-lm -lnacl -lnacl_dyncode -lnosys
+  ifeq ($(USE_TCMALLOC),1)
+    # Note: -ltcmalloc_minimal needs to stay last on the link line
+    LIBS += -ltcmalloc_minimal
+    CXX.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+    C.Flags += -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free
+  endif
+else
+  LIBS +=
+endif
 
 #----------------------------------------------------------
 # Options To Invoke Tools
@@ -1243,8 +1260,10 @@ endif
 $(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(SharedLibDir)/.dir
 	$(Echo) Linking $(BuildMode) $(SharedLibKindMessage) \
 	  $(notdir $@)
+        # @LOCALMOD: the EXTRA_LIBS hack is necessary for LLVMgold.so
+	#             c.f. llvm/tools/gold/Makefile
 	$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) \
-	  $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS)
+	  $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS) $(EXTRA_LIBS)
 else
 $(LibName.SO): $(ObjectsO) $(SharedLibDir)/.dir
 	$(Echo) Linking $(BuildMode) Shared Library $(notdir $@)
diff --git a/OWNERS b/OWNERS
new file mode 100644
index 0000000000..3f2cc43ac7
--- /dev/null
+++ b/OWNERS
@@ -0,0 +1,7 @@
+dschuff@chromium.org
+eliben@chromium.org
+jvoung@chromium.org
+mseaborn@chromium.org
+robertm@chromium.org
+sehr@chromium.org
+
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
new file mode 100644
index 0000000000..d81168ea09
--- /dev/null
+++ b/PRESUBMIT.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2012 The Native Client Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+# Documentation on PRESUBMIT.py can be found at:
+# http://www.chromium.org/developers/how-tos/depottools/presubmit-scripts
+
+EXCLUDE_PROJECT_CHECKS_DIRS = [ '.' ]
+
+import subprocess
+def CheckGitBranch():
+  p = subprocess.Popen("git branch -vv", shell=True,
+                       stdout=subprocess.PIPE)
+  output, _ = p.communicate()
+
+  lines = output.split('\n')
+  for line in lines:
+    # output format for checked-out branch should be
+    # * branchname hash [TrackedBranchName ...
+    toks = line.split()
+    if '*' not in toks[0]:
+      continue
+    if not 'origin/master' in toks[3]:
+      warning = 'Warning: your current branch:\n' + line
+      warning += '\nis not tracking origin/master. git cl push may silently '
+      warning += 'fail to push your change. To fix this, do\n'
+      warning += 'git branch -u origin/master'
+      return warning
+    return None
+  print 'Warning: presubmit check could not determine local git branch'
+  return None
+
+def _CommonChecks(input_api, output_api):
+  """Checks for both upload and commit."""
+  results = []
+  results.extend(input_api.canned_checks.PanProjectChecks(
+      input_api, output_api, project_name='Native Client',
+      excluded_paths=tuple(EXCLUDE_PROJECT_CHECKS_DIRS)))
+  branch_warning = CheckGitBranch()
+  if branch_warning:
+    results.append(output_api.PresubmitPromptWarning(branch_warning))
+  return results
+
+def CheckChangeOnUpload(input_api, output_api):
+  """Verifies all changes in all files.
+  Args:
+    input_api: the limited set of input modules allowed in presubmit.
+    output_api: the limited set of output modules allowed in presubmit.
+  """
+  report = []
+  report.extend(_CommonChecks(input_api, output_api))
+  return report
+
+def CheckChangeOnCommit(input_api, output_api):
+  """Verifies all changes in all files and verifies that the
+  tree is open and can accept a commit.
+  Args:
+    input_api: the limited set of input modules allowed in presubmit.
+    output_api: the limited set of output modules allowed in presubmit.
+  """
+  report = []
+  report.extend(CheckChangeOnUpload(input_api, output_api))
+  return report
+
+def GetPreferredTrySlaves(project, change):
+  return []
diff --git a/autoconf/config.sub b/autoconf/config.sub
index a8d85281f9..0e013633dd 100755
--- a/autoconf/config.sub
+++ b/autoconf/config.sub
@@ -239,6 +239,10 @@ case $os in
 		basic_machine=m68k-atari
 		os=-mint
 		;;
+        -nacl*)
+                basic_machine=i686-pc
+                os=-nacl
+                ;;
 esac
 
 # Decode aliases for certain CPU-COMPANY combinations.
@@ -348,6 +352,14 @@ case $basic_machine in
 	i*86 | x86_64)
 	  basic_machine=$basic_machine-pc
 	  ;;
+        nacl64*)
+          basic_machine=x86_64-pc
+          os=-nacl
+          ;;
+        nacl*)
+          basic_machine=i686-pc
+          os=-nacl
+          ;;
 	# Object if more than one company name word.
 	*-*-*)
 		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
@@ -1366,6 +1378,9 @@ case $os in
 			;;
 		esac
 		;;
+        -nacl*)
+                os=-nacl
+                ;;
 	-nto-qnx*)
 		;;
 	-nto*)
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index a5caac971b..cd0a981c6f 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -297,6 +297,11 @@ AC_CACHE_CHECK([type of operating system we're going to host on],
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Freestanding"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
diff --git a/codereview.settings b/codereview.settings
new file mode 100644
index 0000000000..1940586a7f
--- /dev/null
+++ b/codereview.settings
@@ -0,0 +1,10 @@
+# This file is used by gcl to get repository specific information.
+CODE_REVIEW_SERVER: codereview.chromium.org
+CC_LIST: native-client-reviews@googlegroups.com
+VIEW_VC: https://gerrit.chromium.org/gerrit/gitweb?p=native_client/pnacl-llvm.git;a=commit;h=
+STATUS: http://nativeclient-status.appspot.com/status
+TRY_ON_UPLOAD: False
+TRYSERVER_PROJECT: nacl
+TRYSERVER_SVN_URL: svn://svn.chromium.org/chrome-try/try-nacl
+PUSH_URL_CONFIG: url.ssh://gerrit.chromium.org.pushinsteadof
+ORIGIN_URL_CONFIG: http://git.chromium.org
diff --git a/configure b/configure
index decbc409d8..0116458ed5 100755
--- a/configure
+++ b/configure
@@ -3899,6 +3899,11 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="Freestanding"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
   *)
     llvm_cv_link_all_option=""
     llvm_cv_no_link_all_option=""
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 7743ff06a0..935732a032 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -2868,10 +2868,11 @@ All globals of this sort should have a section specified as
 The '``llvm.used``' Global Variable
 -----------------------------------
 
-The ``@llvm.used`` global is an array which has :ref:`appending linkage
-<linkage_appending>`. This array contains a list of pointers to global
-variables, functions and aliases which may optionally have a pointer cast formed
-of bitcast or getelementptr. For example, a legal use of it is:
+The ``@llvm.used`` global is an array with i8\* element type which has
+:ref:`appending linkage <linkage_appending>`. This array contains a list of
+pointers to global variables and functions which may optionally have a
+pointer cast formed of bitcast or getelementptr. For example, a legal
+use of it is:
 
 .. code-block:: llvm
 
@@ -2883,13 +2884,13 @@ of bitcast or getelementptr. For example, a legal use of it is:
        i8* bitcast (i32* @Y to i8*)
     ], section "llvm.metadata"
 
-If a symbol appears in the ``@llvm.used`` list, then the compiler, assembler,
-and linker are required to treat the symbol as if there is a reference to the
-symbol that it cannot see. For example, if a variable has internal linkage and
-no references other than that from the ``@llvm.used`` list, it cannot be
-deleted. This is commonly used to represent references from inline asms and
-other things the compiler cannot "see", and corresponds to
-"``attribute((used))``" in GNU C.
+If a global variable appears in the ``@llvm.used`` list, then the
+compiler, assembler, and linker are required to treat the symbol as if
+there is a reference to the global that it cannot see. For example, if a
+variable has internal linkage and no references other than that from the
+``@llvm.used`` list, it cannot be deleted. This is commonly used to
+represent references from inline asms and other things the compiler
+cannot "see", and corresponds to "``attribute((used))``" in GNU C.
 
 On some targets, the code generator must emit a directive to the
 assembler or object file to prevent the assembler and linker from
@@ -4007,7 +4008,7 @@ Example:
       <result> = lshr i32 4, 1   ; yields {i32}:result = 2
       <result> = lshr i32 4, 2   ; yields {i32}:result = 1
       <result> = lshr i8  4, 3   ; yields {i8}:result = 0
-      <result> = lshr i8 -2, 1   ; yields {i8}:result = 0x7FFFFFFF
+      <result> = lshr i8 -2, 1   ; yields {i8}:result = 0x7FFFFFFF 
       <result> = lshr i32 1, 32  ; undefined
       <result> = lshr <2 x i32> < i32 -2, i32 4>, < i32 1, i32 2>   ; yields: result=<2 x i32> < i32 0x7FFFFFFF, i32 1>
 
@@ -8613,3 +8614,4 @@ Semantics:
 
 This intrinsic does nothing, and it's removed by optimizers and ignored
 by codegen.
+S
+\ No newline at end of file
diff --git a/docs/PNaClLangRef.rst b/docs/PNaClLangRef.rst
new file mode 100644
index 0000000000..4f322a3eff
--- /dev/null
+++ b/docs/PNaClLangRef.rst
@@ -0,0 +1,367 @@
+==============================
+PNaCl Bitcode Reference Manual
+==============================
+
+.. contents::
+   :local:
+   :depth: 3
+
+Introduction
+============
+
+This document is a reference manual for the PNaCl bitcode format. It describes
+the bitcode on a *semantic* level; the physical encoding level will be described
+elsewhere. For the purpose of this document, the textual form of LLVM IR is
+used to describe instructions and other bitcode constructs.
+
+Since the PNaCl bitcode is based to a large extent on LLVM IR, many sections
+in this document point to a relevant section of the LLVM language reference
+manual. Only the changes, restrictions and variations specific to PNaCl are
+described - full semantic descriptions are not duplicated from the LLVM
+reference manual.
+
+*[TODO(eliben): this may gradually change in the future, as we move more
+contents into this document; also, the physical encoding will also be described
+here in the future, once we know what it's going to be]*
+
+High Level Structure
+====================
+
+A PNaCl portable executable ("pexe" in short) is a single LLVM IR module.
+
+.. _linkagetypes:
+
+Linkage Types
+-------------
+
+`LLVM LangRef: Linkage Types <LangRef.html#linkage>`_
+
+The linkage types supported by PNaCl bitcode are ``internal`` and ``external``.
+A single function in the pexe, named ``_start``, has the linkage type
+``external``. All the other functions and globals have the linkage type
+``internal``.
+
+Calling Conventions
+-------------------
+
+`LLVM LangRef: Calling Conventions <LangRef.html#callingconv>`_
+
+The only calling convention supported by PNaCl bitcode is ``ccc`` - the C
+calling convention.
+
+Visibility Styles
+-----------------
+
+`LLVM LangRef: Visibility Styles <LangRef.html#visibilitystyles>`_
+
+PNaCl bitcode does not support visibility styles.
+
+Global Variables
+----------------
+
+`LLVM LangRef: Global Variables <LangRef.html#globalvars>`_
+
+Restrictions on global variables:
+
+* PNaCl bitcode does not support TLS models.
+* Restrictions on :ref:`linkage types <linkagetypes>`.
+
+TODO: describe other restrictions on global variables
+
+Functions
+---------
+
+`LLVM LangRef: Functions <LangRef.html#functionstructure>`_
+
+The restrictions on :ref:`linkage types <linkagetypes>`, calling conventions
+and visibility styles apply to functions. In addition, the following are
+not supported for functions:
+
+* Function attributes (either for the the function itself, its parameters or its
+  return type).
+* Section specification.
+* Garbage collector name.
+* Functions with a variable number of arguments (*vararg*).
+
+Aliases
+-------
+
+`LLVM LangRef: Aliases <LangRef.html#langref_aliases>`_
+
+PNaCl bitcode does not support aliases.
+
+Named Metadata
+--------------
+
+`LLVM LangRef: Named Metadata <LangRef.html#namedmetadatastructure>`_
+
+While PNaCl bitcode has provisions for debugging metadata, it is not considered
+part of the stable ABI. It exists for tool support and should not appear in
+distributed pexes.
+
+Other kinds of LLVM metadata are not supported.
+
+Module-Level Inline Assembly
+----------------------------
+
+`LLVM LangRef: Module-Level Inline Assembly <LangRef.html#moduleasm>`_
+
+PNaCl bitcode does not support inline assembly.
+
+Volatile Memory Accesses
+------------------------
+
+`LLVM LangRef: Volatile Memory Accesses <LangRef.html#volatile>`_
+
+TODO: are we going to promote volatile to atomic?
+
+Memory Model for Concurrent Operations
+--------------------------------------
+
+`LLVM LangRef: Memory Model for Concurrent Operations <LangRef.html#memmodel>`_
+
+TODO.
+
+Atomic Memory Ordering Constraints
+----------------------------------
+
+`LLVM LangRef: Atomic Memory Ordering Constraints <LangRef.html#ordering>`_
+
+TODO.
+
+Fast-Math Flags
+---------------
+
+`LLVM LangRef: Fast-Math Flags <LangRef.html#fastmath>`_
+
+Fast-math mode is not currently supported by the PNaCl bitcode.
+
+Type System
+===========
+
+`LLVM LangRef: Type System <LangRef.html#typesystem>`_
+
+The LLVM types allowed in PNaCl bitcode are restricted, as follows:
+
+Scalar types
+------------
+
+* The only scalar types allowed are integer, float, double and void.
+
+  * The only integer sizes allowed are i1, i8, i16, i32 and i64.
+  * The only integer sizes allowed for function arguments are i32 and i64.
+
+Arrays and structs are only allowed in TODO.
+
+.. _pointertypes:
+
+Pointer types
+-------------
+
+Pointer types are allowed with the following restrictions:
+
+* Pointers to valid PNaCl bitcode scalar types, as specified above.
+* Pointers to functions (but not intrinsics).
+* The address space for all pointers must be 0.
+
+A pointer is *inherent* when it represents the return value of an ``alloca``
+instruction, or is an address of a global value.
+
+A pointer is *normalized* if it's either:
+
+* *inherent*
+* Is the return value of a ``bitcast`` instruction.
+* Is the return value of a ``inttoptr`` instruction.
+
+Note: the size of a pointer in PNaCl is 32 bits.
+
+Global Variable and Function Addresses
+--------------------------------------
+
+Undefined Values
+----------------
+
+`LLVM LangRef: Undefined Values <LangRef.html#undefvalues>`_
+
+Poison Values
+-------------
+
+`LLVM LangRef: Poison Values <LangRef.html#poisonvalues>`_
+
+PNaCl bitcode does not support poison values; consequently, the ``nsw`` and
+``nuw`` are not supported.
+
+Constant Expressions
+--------------------
+
+`LLVM LangRef: Constant Expressions <LangRef.html#constantexprs>`_
+
+In the general sense, PNaCl bitcode does not support constant expressions.
+There is a single, restricted, use case permitted in global initializers,
+where the ``add`` and ``ptrtoint`` constant expressions are allowed.
+
+Other Values
+============
+
+Metadata Nodes and Metadata Strings
+-----------------------------------
+
+`LLVM LangRef: Metadata Nodes and Metadata Strings <LangRef.html#metadata>`_
+
+While PNaCl bitcode has provisions for debugging metadata, it is not considered
+part of the stable ABI. It exists for tool support and should not appear in
+distributed pexes.
+
+Other kinds of LLVM metadata are not supported.
+
+Intrinsic Global Variables
+==========================
+
+`LLVM LangRef: Intrinsic Global Variables <LangRef.html#intrinsicglobalvariables>`_
+
+PNaCl bitcode does not support intrinsic global variables.
+
+Instruction Reference
+=====================
+
+This is a list of LLVM instructions supported by PNaCl bitcode. Where
+applicable, PNaCl-specific restrictions are provided.
+
+Only the LLVM instructions listed here are supported by PNaCl bitcode.
+
+* ``ret``
+* ``br``
+* ``switch``
+* ``add``
+
+  The ``nsw`` and ``nuw`` modes are not supported.
+
+* ``sub``
+
+  The ``nsw`` and ``nuw`` modes are not supported.
+
+* ``mul``
+
+  The ``nsw`` and ``nuw`` modes are not supported.
+
+* ``shl``
+
+  The ``nsw`` and ``nuw`` modes are not supported.
+
+* ``udiv``, ``sdiv``, ``urem``, ``srem``
+
+  Integer division is guaranteed to trap in PNaCl bitcode. This trap can
+  not be intercepted.
+
+* ``lshr``
+* ``ashr``
+* ``and``
+* ``or``
+* ``xor``
+* ``fadd``
+* ``fsub``
+* ``fmul``
+* ``fdiv``
+* ``frem``
+* ``alloca``
+
+  The only allowed type for ``alloca`` instructions in PNaCl bitcode
+  is i8. For example:
+
+.. code-block:: llvm
+
+    %buf = alloca i8, i32 8, align 4
+
+* ``load``, ``store``
+
+  The pointer argument of these instructions must be a *normalized* pointer
+  (see :ref:`pointer types <pointertypes>`).
+
+* ``fence``
+* ``cmpxchg``, ``atomicrmw``
+
+  The pointer argument of these instructions must be a *normalized* pointer
+  (see :ref:`pointer types <pointertypes>`).
+
+  TODO(jfb): this may change
+
+* ``trunc``
+* ``zext``
+* ``sext``
+* ``fptrunc``
+* ``fpext``
+* ``fptoui``
+* ``fptosi``
+* ``uitofp``
+* ``sitofp``
+
+* ``ptrtoint``
+
+  The pointer argument of a ``ptrtoint`` instruction must be a *normalized*
+  pointer (see :ref:`pointer types <pointertypes>`) and the integer argument
+  must be an i32.
+
+* ``inttoptr``
+
+  The integer argument of a ``inttoptr`` instruction must be an i32.
+
+* ``bitcast``
+
+  The pointer argument of a ``bitcast`` instruction must be a *inherent* pointer
+  (see :ref:`pointer types <pointertypes>`).
+
+* ``icmp``
+* ``fcmp``
+* ``phi``
+* ``select``
+* ``call``
+
+Intrinsic Functions
+===================
+
+`LLVM LangRef: Intrinsic Functions <LangRef.html#intrinsics>`_
+
+The only intrinsics supported by PNaCl bitcode are the following.
+
+TODO(jfb): atomics
+
+* ``llvm.memcpy``
+* ``llvm.memmove``
+* ``llvm.memset``
+
+  These intrinsics are only supported with an i32 ``len`` argument.
+
+* ``llvm.bswap``
+
+  The overloaded ``llvm.bswap`` intrinsic is only supported with the following
+  argument types: i16, i32, i64 (the types supported by C-style GCC builtins).
+
+* ``llvm.ctlz``
+* ``llvm.cttz``
+* ``llvm.ctpop``
+
+  The overloaded llvm.ctlz, llvm.cttz, and llvm.ctpop intrinsics are only
+  supported with the i32 and i64 argument types (the types supported by
+  C-style GCC builtins).
+
+* ``llvm.sqrt``
+
+  The overloaded ``llvm.sqrt`` intrinsic is only supported for float
+  and double arguments types. Unlike the standard LLVM intrinsic,
+  PNaCl guarantees that llvm.sqrt returns a QNaN for values less than -0.0.
+
+* ``llvm.stacksave``
+* ``llvm.stackrestore``
+* ``llvm.trap``
+* ``llvm.nacl.read.tp``
+
+  TODO: describe
+
+* ``llvm.nacl.longjmp``
+
+  TODO: describe
+
+* ``llvm.nacl.setjmp``
+
+  TODO: describe
+
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index 40110fddfc..ce3546bd49 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -60,6 +60,13 @@ typedef enum {
     LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC = 2
 } lto_codegen_model;
 
+/* @LOCALMOD-BEGIN */
+typedef enum {
+    LTO_OUTPUT_FORMAT_OBJECT = 0,  /* object file */
+    LTO_OUTPUT_FORMAT_SHARED = 1,  /* shared library */
+    LTO_OUTPUT_FORMAT_EXEC   = 2   /* executable */
+} lto_output_format;
+/* @LOCALMOD-END */
 
 /** opaque reference to a loaded object module */
 typedef struct LTOModule*         lto_module_t;
@@ -71,6 +78,17 @@ typedef struct LTOCodeGenerator*  lto_code_gen_t;
 extern "C" {
 #endif
 
+
+/* @LOCALMOD-BEGIN */
+
+/* Add a command-line option */
+void lto_add_command_line_option(const char* opt);
+
+/* Parse command line options */
+void lto_parse_command_line_options();
+
+/* @LOCALMOD-END */
+
 /**
  * Returns a printable string.
  */
@@ -165,6 +183,36 @@ lto_module_get_target_triple(lto_module_t mod);
 extern void
 lto_module_set_target_triple(lto_module_t mod, const char *triple);
 
+/* @LOCALMOD-BEGIN */
+
+/**
+ * Get the module format for this module
+ */
+extern lto_output_format
+lto_module_get_output_format(lto_module_t mod);
+
+/**
+ * Get the module soname
+ */
+extern const char*
+lto_module_get_soname(lto_module_t mod);
+
+
+/**
+ * Get the i'th library dependency.
+ * Returns NULL if i >= lto_module_get_num_library_deps()
+ */
+extern const char*
+lto_module_get_library_dep(lto_module_t mod, unsigned int i);
+
+
+/**
+ * Return the number of library dependencies of this module.
+ */
+extern unsigned int
+lto_module_get_num_library_deps(lto_module_t mod);
+
+/* @LOCALMOD-END */
 
 /**
  * Returns the number of symbols in the object module.
@@ -211,7 +259,26 @@ lto_codegen_dispose(lto_code_gen_t);
 extern bool
 lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod);
 
+/* @LOCALMOD-BEGIN */
+/**
+ * Add an object module to the set of modules for which code will be generated.
+ * This does not merge the module immediately, unlike lto_codegen_add_module.
+ * It will hold onto the module until the user calls
+ * lto_codegen_link_gathered_modules_and_dispose().  The lto_module_t
+ * should now by owned by the lto_code_gen_t, and will be freed when
+ * the link is done.
+ */
+extern void
+lto_codegen_gather_module_for_link(lto_code_gen_t cg, lto_module_t mod);
 
+/**
+ * Merges modules that are part of the set of modules gathered by
+ * lto_codegen_gather_module_for_link(), and the also destroys the modules
+ * as lto_module_dispose() would.
+ */
+extern bool
+lto_codegen_link_gathered_modules_and_dispose(lto_code_gen_t cg);
+/* @LOCALMOD-END*/
 
 /**
  * Sets if debug info should be generated.
@@ -258,6 +325,56 @@ lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
 extern void
 lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol);
 
+/* @LOCALMOD-BEGIN */
+
+/**
+ * Sets the module type for the merged module
+ */
+extern void
+lto_codegen_set_merged_module_output_format(lto_code_gen_t cg,
+                                            lto_output_format format);
+
+/**
+ * Sets the SOName for the merged module
+ */
+extern void
+lto_codegen_set_merged_module_soname(lto_code_gen_t cg,
+                                     const char *soname);
+
+/**
+ * Add a library dependency to the merged module
+ */
+extern void
+lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg,
+                                          const char *lib);
+
+/**
+ * Wrap a symbol in the merged module.
+ */
+extern void
+lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg,
+                                         const char *sym);
+
+
+/**
+ * Set version of a defined symbol in the merged module
+ */
+extern const char *
+lto_codegen_set_symbol_def_version(lto_code_gen_t cg,
+                                   const char *sym,
+                                   const char *version,
+                                   bool is_default);
+
+
+/**
+ * Set version of an undefined symbol in the merged module
+ */
+extern const char *
+lto_codegen_set_symbol_needed(lto_code_gen_t cg,
+                              const char *sym,
+                              const char *version,
+                              const char *dynfile);
+/* @LOCALMOD-END */
 /**
  * Writes a new object file at the specified path that contains the
  * merged contents of all modules added so far.
diff --git a/include/llvm/Analysis/NaCl.h b/include/llvm/Analysis/NaCl.h
new file mode 100644
index 0000000000..f174e72608
--- /dev/null
+++ b/include/llvm/Analysis/NaCl.h
@@ -0,0 +1,71 @@
+//===-- NaCl.h - NaCl Analysis ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_NACL_H
+#define LLVM_ANALYSIS_NACL_H
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+class FunctionPass;
+class ModulePass;
+extern cl::opt<bool> PNaClABIAllowDebugMetadata;
+
+class PNaClABIErrorReporter {
+ public:
+  PNaClABIErrorReporter() : ErrorCount(0), Errors(ErrorString),
+                            UseFatalErrors(true) {}
+  // Return the number of verification errors from the last run.
+  int getErrorCount() { return ErrorCount; }
+  // Print the error messages to O
+  void printErrors(llvm::raw_ostream &O) {
+    Errors.flush();
+    O << ErrorString;
+  }
+  // Increments the error count and returns an ostream to which the error
+  // message can be streamed.
+  raw_ostream &addError() {
+    ErrorCount++;
+    return Errors;
+  }
+  // Reset the error count and error messages.
+  void reset() {
+    ErrorCount = 0;
+    Errors.flush();
+    ErrorString.clear();
+  }
+  void setNonFatal() {
+    UseFatalErrors = false;
+  }
+  void checkForFatalErrors() {
+    if (UseFatalErrors && ErrorCount != 0) {
+      printErrors(errs());
+      report_fatal_error("PNaCl ABI verification failed");
+    }
+  }
+ private:
+  int ErrorCount;
+  std::string ErrorString;
+  raw_string_ostream Errors;
+  bool UseFatalErrors;
+};
+
+FunctionPass *createPNaClABIVerifyFunctionsPass(
+    PNaClABIErrorReporter *Reporter);
+ModulePass *createPNaClABIVerifyModulePass(PNaClABIErrorReporter *Reporter,
+                                           bool StreamingMode = false);
+
+}
+
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitCodes.h b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
new file mode 100644
index 0000000000..bb52d0e20e
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
@@ -0,0 +1,257 @@
+//===- NaClBitCodes.h - Enum values for the bitcode format ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header Bitcode enum values.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODES_H
+#define LLVM_BITCODE_NACL_NACLBITCODES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+namespace naclbitc {
+  enum StandardWidths {
+    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth   = 4,  // Codelen are VBR-4.
+    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
+  };
+
+  // The standard abbrev namespace always has a way to exit a block, enter a
+  // nested block, define abbrevs, and define an unabbreviated record.
+  enum FixedAbbrevIDs {
+    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+    ENTER_SUBBLOCK = 1,
+
+    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
+    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
+    /// single bit to indicate if it is a literal encoding.  If so, the value is
+    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
+    /// by the info value as a vbr5 if needed.
+    DEFINE_ABBREV = 2,
+
+    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+    // a vbr6 for the # operands, followed by vbr6's for each operand.
+    UNABBREV_RECORD = 3,
+
+    // This is not a code, this is a marker for the first abbrev assignment.
+    // In addition, we assume up to two additional enumerated constants are
+    // added for each extension. These constants are:
+    //
+    //   PREFIX_MAX_FIXED_ABBREV
+    //   PREFIX_MAX_ABBREV
+    //
+    // PREFIX_MAX_ABBREV defines the maximal enumeration value used for
+    // the code selector of a block. If Both PREFIX_MAX_FIXED_ABBREV
+    // and PREFIX_MAX_ABBREV is defined, then PREFIX_MAX_FIXED_ABBREV
+    // defines the last code selector of the block that must be read using
+    // a single read (i.e. a FIXED read, or the first chunk of a VBR read.
+    FIRST_APPLICATION_ABBREV = 4,
+    // Defines default values for code length, if no additional selectors
+    // are added.
+    DEFAULT_MAX_ABBREV = FIRST_APPLICATION_ABBREV-1
+  };
+
+  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+  /// block, which contains metadata about other blocks in the file.
+  enum StandardBlockIDs {
+    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
+    /// standard abbrevs that should be available to all blocks of a specified
+    /// ID.
+    BLOCKINFO_BLOCK_ID = 0,
+
+    // Block IDs 1-7 are reserved for future expansion.
+    FIRST_APPLICATION_BLOCKID = 8
+  };
+
+  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
+  /// blocks.
+  enum BlockInfoCodes {
+    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+    // block, instead of the BlockInfo block.
+
+    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
+    BLOCKINFO_CODE_BLOCKNAME     = 2, // BLOCKNAME: [name]
+    BLOCKINFO_CODE_SETRECORDNAME = 3  // BLOCKINFO_CODE_SETRECORDNAME:
+                                      //                             [id, name]
+  };
+
+} // End naclbitc namespace
+
+/// NaClBitCodeAbbrevOp - This describes one or more operands in an abbreviation.
+/// This is actually a union of two different things:
+///   1. It could be a literal integer value ("the operand is always 17").
+///   2. It could be an encoding specification ("this operand encoded like so").
+///
+class NaClBitCodeAbbrevOp {
+  uint64_t Val;           // A literal value or data for an encoding.
+  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
+  unsigned Enc   : 3;     // The encoding to use.
+public:
+  enum Encoding {
+    Fixed = 1,  // A fixed width field, Val specifies number of bits.
+    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
+    Array = 3,  // A sequence of fields, next field species elt encoding.
+    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
+    Blob  = 5   // 32-bit aligned array of 8-bit characters.
+  };
+
+  explicit NaClBitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
+  explicit NaClBitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
+    : Val(Data), IsLiteral(false), Enc(E) {}
+
+  bool isLiteral() const  { return IsLiteral; }
+  bool isEncoding() const { return !IsLiteral; }
+
+  // Accessors for literals.
+  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
+
+  // Accessors for encoding info.
+  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
+  uint64_t getEncodingData() const {
+    assert(isEncoding() && hasEncodingData());
+    return Val;
+  }
+
+  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
+  static bool hasEncodingData(Encoding E) {
+    switch (E) {
+    case Fixed:
+    case VBR:
+      return true;
+    case Array:
+    case Char6:
+    case Blob:
+      return false;
+    }
+    llvm_unreachable("Invalid encoding");
+  }
+
+  /// isChar6 - Return true if this character is legal in the Char6 encoding.
+  static bool isChar6(char C) {
+    if (C >= 'a' && C <= 'z') return true;
+    if (C >= 'A' && C <= 'Z') return true;
+    if (C >= '0' && C <= '9') return true;
+    if (C == '.' || C == '_') return true;
+    return false;
+  }
+  static unsigned EncodeChar6(char C) {
+    if (C >= 'a' && C <= 'z') return C-'a';
+    if (C >= 'A' && C <= 'Z') return C-'A'+26;
+    if (C >= '0' && C <= '9') return C-'0'+26+26;
+    if (C == '.')             return 62;
+    if (C == '_')             return 63;
+    llvm_unreachable("Not a value Char6 character!");
+  }
+
+  static char DecodeChar6(unsigned V) {
+    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
+    if (V < 26)       return V+'a';
+    if (V < 26+26)    return V-26+'A';
+    if (V < 26+26+10) return V-26-26+'0';
+    if (V == 62)      return '.';
+    if (V == 63)      return '_';
+    llvm_unreachable("Not a value Char6 character!");
+  }
+
+};
+
+template <> struct isPodLike<NaClBitCodeAbbrevOp> {
+  static const bool value=true;
+};
+
+/// NaClBitCodeAbbrev - This class represents an abbreviation record.  An
+/// abbreviation allows a complex record that has redundancy to be stored in a
+/// specialized format instead of the fully-general, fully-vbr, format.
+class NaClBitCodeAbbrev {
+  SmallVector<NaClBitCodeAbbrevOp, 32> OperandList;
+  unsigned char RefCount; // Number of things using this.
+  ~NaClBitCodeAbbrev() {}
+public:
+  NaClBitCodeAbbrev() : RefCount(1) {}
+
+  void addRef() { ++RefCount; }
+  void dropRef() { if (--RefCount == 0) delete this; }
+
+  unsigned getNumOperandInfos() const {
+    return static_cast<unsigned>(OperandList.size());
+  }
+  const NaClBitCodeAbbrevOp &getOperandInfo(unsigned N) const {
+    return OperandList[N];
+  }
+
+  void Add(const NaClBitCodeAbbrevOp &OpInfo) {
+    OperandList.push_back(OpInfo);
+  }
+};
+
+/// \brief Returns number of bits needed to encode
+/// value for dense FIXED encoding.
+inline unsigned NaClBitsNeededForValue(unsigned Value) {
+  // Note: Need to handle case where Value=0xFFFFFFFF as special case,
+  // since we can't add 1 to it.
+  if (Value >= 0x80000000) return 32;
+  return Log2_32_Ceil(Value+1);
+}
+
+/// \brief Encode a signed value by moving the sign to the LSB for dense
+/// VBR encoding.
+inline uint64_t NaClEncodeSignRotatedValue(int64_t V) {
+  return (V >= 0) ? (V << 1) : ((-V << 1) | 1);
+}
+
+/// \brief Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+inline uint64_t NaClDecodeSignRotatedValue(uint64_t V) {
+  if ((V & 1) == 0)
+    return V >> 1;
+  if (V != 1)
+    return -(V >> 1);
+  // There is no such thing as -0 with integers.  "-0" really means MININT.
+  return 1ULL << 63;
+}
+
+/// \brief This class determines whether a FIXED or VBR
+/// abbreviation should be used for the selector, and the number of bits
+/// needed to capture such selectors.
+class NaClBitcodeSelectorAbbrev {
+
+public:
+  // If true, use a FIXED abbreviation. Otherwise, use a VBR abbreviation.
+  bool IsFixed;
+  // Number of bits needed for selector.
+  unsigned NumBits;
+
+  // Creates a selector range for the given values.
+  NaClBitcodeSelectorAbbrev(bool IF, unsigned NB)
+      : IsFixed(IF), NumBits(NB) {}
+
+  // Creates a selector range when no abbreviations are defined.
+  NaClBitcodeSelectorAbbrev()
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(naclbitc::DEFAULT_MAX_ABBREV)) {}
+
+  // Creates a selector range to handle fixed abbrevations up to
+  // the specified value.
+  explicit NaClBitcodeSelectorAbbrev(unsigned MaxAbbrev)
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(MaxAbbrev)) {}
+};
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
new file mode 100644
index 0000000000..8febf95564
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
@@ -0,0 +1,219 @@
+//===-- llvm/Bitcode/NaCl/NaClBitcodeHeader.h - ----------------*- C++ -*-===//
+//      NaCl Bitcode header reader.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// file headers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+#define LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class StreamableMemoryObject;
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known (typed) Tag IDs.
+//
+// The serialized format has 2 fixed subfields (ID:type and data length) and the
+// variable-length data subfield
+class NaClBitcodeHeaderField {
+  NaClBitcodeHeaderField(const NaClBitcodeHeaderField &) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeHeaderField &)LLVM_DELETED_FUNCTION;
+
+public:
+  // Defines the ID associated with the value. Valid values are in
+  // {0x0, ..., 0xFFF}
+  typedef enum {
+    kInvalid = 0,     // KUnknownType.
+    kPNaClVersion = 1 // kUint32.
+  } Tag;
+  // Defines the type of value.
+  typedef enum {
+    kBufferType, // Buffer of form uint8_t[len].
+    kUInt32Type
+  } FieldType;
+  // Defines the number of bytes in a (32-bit) word.
+  static const int WordSize = 4;
+
+  // Defines the encoding of the fixed fields {i.e. ID:type and data length).
+  typedef uint16_t FixedSubfield;
+
+  // Create an invalid header field.
+  NaClBitcodeHeaderField();
+
+  // Create a header field with an uint32_t value.
+  NaClBitcodeHeaderField(Tag MyID, uint32_t value);
+
+  // Create a header field for the given data.
+  NaClBitcodeHeaderField(Tag MyID, size_t MyLen, uint8_t *MyData);
+
+  virtual ~NaClBitcodeHeaderField() {
+    if (Data)
+      delete[] Data;
+  }
+
+  /// \brief Number of bytes used to represent header field.
+  size_t GetTotalSize() const {
+    // Round up to 4 byte alignment
+    return (kTagLenSize + Len + (WordSize - 1)) & ~(WordSize - 1);
+  }
+
+  /// \brief Write field into Buf[BufLen].
+  bool Write(uint8_t *Buf, size_t BufLen) const;
+
+  /// \brief Read field form Buf[BufLen].
+  bool Read(const uint8_t *Buf, size_t BufLen);
+
+  /// \brief Returns string describing field.
+  std::string Contents() const;
+
+  /// \brief Get the data size from a serialized field to allow allocation.
+  static size_t GetDataSizeFromSerialized(const uint8_t *Buf) {
+    FixedSubfield Length;
+    ReadFixedSubfield(&Length, Buf + sizeof(FixedSubfield));
+    return Length;
+  }
+
+  /// \brief Return the ID of the field.
+  Tag GetID() const { return ID; }
+
+  FieldType GetType() const { return FType; }
+
+  /// \brief Return the length of the data (in bytes).
+  size_t GetLen() const { return Len; }
+
+  /// \brief Return the data. Data is array getData()[getLen()].
+  const uint8_t *GetData() const { return Data; }
+
+  /// \brief Returns the uint32_t value stored. Requires that
+  /// getType() == kUint32Type
+  uint32_t GetUInt32Value() const;
+
+private:
+  // Convert ID:Type into a fixed subfield
+  FixedSubfield EncodeTypedID() const { return (ID << 4) | FType; }
+  // Extract out ID and Type from a fixed subfield.
+  void DecodeTypedID(FixedSubfield Subfield, Tag &ID, FieldType &FType) {
+    ID = static_cast<Tag>(Subfield >> 4);
+    FType = static_cast<FieldType>(Subfield & 0xF);
+  }
+  // Combined size of the fixed subfields
+  const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+  static void WriteFixedSubfield(FixedSubfield Value, uint8_t *Buf) {
+    Buf[0] = Value & 0xFF;
+    Buf[1] = (Value >> 8) & 0xFF;
+  }
+  static void ReadFixedSubfield(FixedSubfield *Value, const uint8_t *Buf) {
+    *Value = Buf[0] | Buf[1] << 8;
+  }
+  Tag ID;
+  FieldType FType;
+  size_t Len;
+  uint8_t *Data;
+};
+
+/// \brief Class holding parsed header fields in PNaCl bitcode file.
+class NaClBitcodeHeader {
+  // The set of parsed header fields. The header takes ownership of
+  // all fields in this vector.
+  std::vector<NaClBitcodeHeaderField *> Fields;
+  // The number of bytes in the PNaCl header.
+  size_t HeaderSize;
+  // String defining why it is unsupported (if unsupported).
+  std::string UnsupportedMessage;
+  // Flag defining if header is supported.
+  bool IsSupportedFlag;
+  // Flag defining if the corresponding bitcode file is readable.
+  bool IsReadableFlag;
+  // Defines the PNaCl version defined by the header file.
+  uint32_t PNaClVersion;
+
+public:
+  static const int WordSize = NaClBitcodeHeaderField::WordSize;
+
+  NaClBitcodeHeader();
+  ~NaClBitcodeHeader();
+
+  /// \brief Installs the fields of the header, defining if the header
+  /// is readable and supported.
+  void InstallFields();
+
+  /// \brief Read the PNaCl bitcode header, The format of the header is:
+  ///
+  ///    1) 'PEXE' - The four character sequence defining the magic number.
+  ///    2) uint_16 num_fields - The number of NaClBitcodeHeaderField's.
+  ///    3) uint_16 num_bytes - The number of bytes to hold fields in
+  ///                           the header.
+  ///    4) NaClBitcodeHeaderField f1 - The first bitcode header field.
+  ///    ...
+  ///    2 + num_fields) NaClBitcodeHeaderField fn - The last bitcode header
+  /// field.
+  ///
+  /// Returns false if able to read (all of) the bitcode header.
+  bool Read(const unsigned char *&BufPtr, const unsigned char *&BufEnd);
+
+  // \brief Read the PNaCl bitcode header, recording the fields found
+  // in the header. Returns false if able to read (all of) the bitcode header.
+  bool Read(StreamableMemoryObject *Bytes);
+
+  // \brief Returns the number of bytes read to consume the header.
+  size_t getHeaderSize() { return HeaderSize; }
+
+  /// \brief Returns C string describing why the header describes
+  /// an unsupported PNaCl Bitcode file. Returns 0 if supported.
+  const std::string Unsupported() const { return UnsupportedMessage; }
+
+  /// \brief Returns true if supported. That is, it can be run in the
+  /// browser.
+  bool IsSupported() const { return IsSupportedFlag; }
+
+  /// \brief Returns true if the bitcode file should be readable. Note
+  /// that just because it is readable, it doesn't necessarily mean that
+  /// it is supported.
+  bool IsReadable() const { return IsReadableFlag; }
+
+  /// \brief Returns number of fields defined.
+  size_t NumberFields() const { return Fields.size(); }
+
+  /// \brief Returns a pointer to the field with the given ID
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetTaggedField(NaClBitcodeHeaderField::Tag ID) const;
+
+  /// \brief Returns a pointer to the Nth field in the header
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetField(size_t index) const;
+
+  /// \brief Returns the PNaClVersion, as defined by the header.
+  uint32_t GetPNaClVersion() const { return PNaClVersion; }
+
+private:
+  // Reads and verifies the first 8 bytes of the header, consisting
+  // of the magic number 'PEXE', and the value defining the number
+  // of fields and number of bytes used to hold fields.
+  // Returns false if successful.
+  bool ReadPrefix(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned &NumFields, unsigned &NumBytes);
+
+  // Reads and verifies the fields in the header.
+  // Returns false if successful.
+  bool ReadFields(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned NumFields, unsigned NumBytes);
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
new file mode 100644
index 0000000000..a338bbfe79
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -0,0 +1,563 @@
+//===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===//
+//     Low-level bitstream reader interface
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+#define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <climits>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class Deserializer;
+
+/// NaClBitstreamReader - This class is used to read from a NaCl
+/// bitcode wire format stream, maintaining information that is global
+/// to decoding the entire file.  While a file is being read, multiple
+/// cursors can be independently advanced or skipped around within the
+/// file.  These are represented by the NaClBitstreamCursor class.
+class NaClBitstreamReader {
+public:
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<NaClBitCodeAbbrev*> Abbrevs;
+    std::string Name;
+
+    std::vector<std::pair<unsigned, std::string> > RecordNames;
+  };
+private:
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
+
+  std::vector<BlockInfo> BlockInfoRecords;
+
+  /// IgnoreBlockInfoNames - This is set to true if we don't care
+  /// about the block/record name information in the BlockInfo
+  /// block. Only pnacl-bcanalyzer uses this.
+  bool IgnoreBlockInfoNames;
+
+  /// \brief Holds the offset of the first byte after the header.
+  size_t InitialAddress;
+
+  NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+public:
+  NaClBitstreamReader() : IgnoreBlockInfoNames(true), InitialAddress(0) {}
+
+  NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) {
+    IgnoreBlockInfoNames = true;
+    InitialAddress = 0;
+    init(Start, End);
+  }
+
+  NaClBitstreamReader(StreamableMemoryObject *Bytes,
+                      size_t MyInitialAddress=0)
+      : InitialAddress(MyInitialAddress)
+  {
+    BitcodeBytes.reset(Bytes);
+  }
+
+  void init(const unsigned char *Start, const unsigned char *End) {
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
+  }
+
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
+  ~NaClBitstreamReader() {
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  /// CollectBlockInfoNames - This is called by clients that want block/record
+  /// name information.
+  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
+  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+
+  /// \brief Returns the initial address (after the header) of the input stream.
+  size_t getInitialAddress() const {
+    return InitialAddress;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// hasBlockInfoRecords - Return true if we've already read and processed the
+  /// block info block for this Bitstream.  We only process it for the first
+  /// cursor that walks over it.
+  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+};
+
+  
+/// NaClBitstreamEntry - When advancing through a bitstream cursor,
+/// each advance can discover a few different kinds of entries:
+///   Error    - Malformed bitcode was found.
+///   EndBlock - We've reached the end of the current block, (or the end of the
+///              file, which is treated like a series of EndBlock records.
+///   SubBlock - This is the start of a new subblock of a specific ID.
+///   Record   - This is a record with a specific AbbrevID.
+///
+struct NaClBitstreamEntry {
+  enum {
+    Error,
+    EndBlock,
+    SubBlock,
+    Record
+  } Kind;
+  
+  unsigned ID;
+
+  static NaClBitstreamEntry getError() {
+    NaClBitstreamEntry E; E.Kind = Error; return E;
+  }
+  static NaClBitstreamEntry getEndBlock() {
+    NaClBitstreamEntry E; E.Kind = EndBlock; return E;
+  }
+  static NaClBitstreamEntry getSubBlock(unsigned ID) {
+    NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
+  }
+  static NaClBitstreamEntry getRecord(unsigned AbbrevID) {
+    NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
+  }
+};
+
+/// NaClBitstreamCursor - This represents a position within a bitcode
+/// file.  There may be multiple independent cursors reading within
+/// one bitstream, each maintaining their own local state.
+///
+/// Unlike iterators, NaClBitstreamCursors are heavy-weight objects
+/// that should not be passed by value.
+class NaClBitstreamCursor {
+  friend class Deserializer;
+  NaClBitstreamReader *BitStream;
+  size_t NextChar;
+
+  /// CurWord/word_t - This is the current data we have pulled from the stream
+  /// but have not returned to the client.  This is specifically and
+  /// intentionally defined to follow the word size of the host machine for
+  /// efficiency.  We use word_t in places that are aware of this to make it
+  /// perfectly explicit what is going on.
+  typedef uint32_t word_t;
+  word_t CurWord;
+
+  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+  /// is always from [0...31/63] inclusive (depending on word size).
+  unsigned BitsInCurWord;
+
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  NaClBitcodeSelectorAbbrev CurCodeSize;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
+    std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
+    explicit Block() : PrevCodeSize() {}
+    explicit Block(const NaClBitcodeSelectorAbbrev& PCS)
+        : PrevCodeSize(PCS) {}
+  };
+
+  /// BlockScope - This tracks the codesize of parent blocks.
+  SmallVector<Block, 8> BlockScope;
+
+public:
+  NaClBitstreamCursor() : BitStream(0), NextChar(0) {
+  }
+  NaClBitstreamCursor(const NaClBitstreamCursor &RHS)
+      : BitStream(0), NextChar(0) {
+    operator=(RHS);
+  }
+
+  explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) {
+    NextChar = R.getInitialAddress();
+    CurWord = 0;
+    BitsInCurWord = 0;
+  }
+
+  void init(NaClBitstreamReader &R) {
+    freeState();
+
+    BitStream = &R;
+    NextChar = R.getInitialAddress();
+    CurWord = 0;
+    BitsInCurWord = 0;
+  }
+
+  ~NaClBitstreamCursor() {
+    freeState();
+  }
+
+  void operator=(const NaClBitstreamCursor &RHS);
+
+  void freeState();
+  
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  bool AtEndOfStream() {
+    return BitsInCurWord == 0 && isEndPos(NextChar);
+  }
+
+  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; }
+
+  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
+  uint64_t GetCurrentBitNo() const {
+    return NextChar*CHAR_BIT - BitsInCurWord;
+  }
+
+  NaClBitstreamReader *getBitStreamReader() {
+    return BitStream;
+  }
+  const NaClBitstreamReader *getBitStreamReader() const {
+    return BitStream;
+  }
+
+  /// Flags that modify the behavior of advance().
+  enum {
+    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
+    /// not automatically pop the block scope when the end of a block is
+    /// reached.
+    AF_DontPopBlockAtEnd = 1,
+
+    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
+    /// returned just like normal records.
+    AF_DontAutoprocessAbbrevs = 2
+  };
+  
+  /// advance - Advance the current bitstream, returning the next entry in the
+  /// stream.
+  NaClBitstreamEntry advance(unsigned Flags = 0) {
+    while (1) {
+      unsigned Code = ReadCode();
+      if (Code == naclbitc::END_BLOCK) {
+        // Pop the end of the block unless Flags tells us not to.
+        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
+          return NaClBitstreamEntry::getError();
+        return NaClBitstreamEntry::getEndBlock();
+      }
+      
+      if (Code == naclbitc::ENTER_SUBBLOCK)
+        return NaClBitstreamEntry::getSubBlock(ReadSubBlockID());
+      
+      if (Code == naclbitc::DEFINE_ABBREV &&
+          !(Flags & AF_DontAutoprocessAbbrevs)) {
+        // We read and accumulate abbrev's, the client can't do anything with
+        // them anyway.
+        ReadAbbrevRecord();
+        continue;
+      }
+
+      return NaClBitstreamEntry::getRecord(Code);
+    }
+  }
+
+  /// advanceSkippingSubblocks - This is a convenience function for clients that
+  /// don't expect any subblocks.  This just skips over them automatically.
+  NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
+    while (1) {
+      // If we found a normal entry, return it.
+      NaClBitstreamEntry Entry = advance(Flags);
+      if (Entry.Kind != NaClBitstreamEntry::SubBlock)
+        return Entry;
+      
+      // If we found a sub-block, just skip over it and check the next entry.
+      if (SkipBlock())
+        return NaClBitstreamEntry::getError();
+    }
+  }
+
+  /// JumpToBit - Reset the stream to the specified bit number.
+  void JumpToBit(uint64_t BitNo) {
+    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
+    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
+    assert(canSkipToPos(ByteNo) && "Invalid location");
+
+    // Move the cursor to the right word.
+    NextChar = ByteNo;
+    BitsInCurWord = 0;
+    CurWord = 0;
+
+    // Skip over any bits that are already consumed.
+    if (WordBitNo) {
+      if (sizeof(word_t) > 4)
+        Read64(WordBitNo);
+      else
+        Read(WordBitNo);
+    }
+  }
+
+  uint32_t Read(unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 &&
+           "Cannot return zero or more than 32 bits!");
+    
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
+      CurWord >>= NumBits;
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    // If we run out of data, stop at the end of the stream.
+    if (isEndPos(NextChar)) {
+      CurWord = 0;
+      BitsInCurWord = 0;
+      return 0;
+    }
+
+    uint32_t R = uint32_t(CurWord);
+
+    // Read the next word from the stream.
+    uint8_t Array[sizeof(word_t)] = {0};
+    
+    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
+                                           Array, NULL);
+    
+    // Handle big-endian byte-swapping if necessary.
+    support::detail::packed_endian_specific_integral
+      <word_t, support::little, support::unaligned> EndianValue;
+    memcpy(&EndianValue, Array, sizeof(Array));
+    
+    CurWord = EndianValue;
+
+    NextChar += sizeof(word_t);
+
+    // Extract NumBits-BitsInCurWord from what we just read.
+    unsigned BitsLeft = NumBits-BitsInCurWord;
+
+    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
+    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
+                    << BitsInCurWord);
+
+    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
+    // range [1..32]/[1..64] so be careful how we shift.
+    if (BitsLeft != sizeof(word_t)*8)
+      CurWord >>= BitsLeft;
+    else
+      CurWord = 0;
+    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
+    return R;
+  }
+
+  uint64_t Read64(unsigned NumBits) {
+    if (NumBits <= 32) return Read(NumBits);
+
+    uint64_t V = Read(32);
+    return V | (uint64_t)Read(NumBits-32) << 32;
+  }
+
+  uint32_t ReadVBR(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
+  uint64_t ReadVBR64(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
+
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+private:
+  void SkipToFourByteBoundary() {
+    // If word_t is 64-bits and if we've read less than 32 bits, just dump
+    // the bits we have up to the next 32-bit boundary.
+    if (sizeof(word_t) > 4 &&
+        BitsInCurWord >= 32) {
+      CurWord >>= BitsInCurWord-32;
+      BitsInCurWord = 32;
+      return;
+    }
+    
+    BitsInCurWord = 0;
+    CurWord = 0;
+  }
+public:
+
+  unsigned ReadCode() {
+    return CurCodeSize.IsFixed
+        ? Read(CurCodeSize.NumBits)
+        : ReadVBR(CurCodeSize.NumBits);
+  }
+
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+  /// the block.
+  unsigned ReadSubBlockID() {
+    return ReadVBR(naclbitc::BlockIDWidth);
+  }
+
+  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+  /// over the body of this block.  If the block record is malformed, return
+  /// true.
+  bool SkipBlock() {
+    // Read and ignore the codelen value.  Since we are skipping this block, we
+    // don't care what code widths are used inside of it.
+    ReadVBR(naclbitc::CodeLenWidth);
+    SkipToFourByteBoundary();
+    unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth);
+
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
+      return true;
+
+    JumpToBit(SkipTo);
+    return false;
+  }
+
+  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+  /// the block, and return true if the block has an error.
+  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
+  
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToFourByteBoundary();
+
+    popBlockScope();
+    return false;
+  }
+
+private:
+
+  void popBlockScope() {
+    CurCodeSize = BlockScope.back().PrevCodeSize;
+
+    // Delete abbrevs from popped scope.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+
+private:
+  void readAbbreviatedLiteral(const NaClBitCodeAbbrevOp &Op,
+                              SmallVectorImpl<uint64_t> &Vals);
+  void readAbbreviatedField(const NaClBitCodeAbbrevOp &Op,
+                            SmallVectorImpl<uint64_t> &Vals);
+  void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op);
+  
+public:
+
+  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
+  const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+    unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    return CurAbbrevs[AbbrevNo];
+  }
+
+  /// skipRecord - Read the current record and discard it.
+  void skipRecord(unsigned AbbrevID);
+  
+  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
+                      StringRef *Blob = 0);
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Processing
+  //===--------------------------------------------------------------------===//
+  void ReadAbbrevRecord();
+  
+  bool ReadBlockInfoBlock();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
new file mode 100644
index 0000000000..2237b6e29b
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
@@ -0,0 +1,587 @@
+//===- NaClBitstreamWriter.h - NaCl bitstream writer ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamWriter class.  This class can be used to
+// write an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITSTREAMWRITER_H
+#define LLVM_BITCODE_NACL_NACLBITSTREAMWRITER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/NaCl/NaClBitCodes.h"
+#include <vector>
+
+namespace llvm {
+
+class NaClBitstreamWriter {
+  SmallVectorImpl<char> &Out;
+
+  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+  unsigned CurBit;
+
+  /// CurValue - The current value.  Only bits < CurBit are valid.
+  uint32_t CurValue;
+
+  /// CurCodeSize - This is the declared size of code values used for the
+  /// current block, in bits.
+  NaClBitcodeSelectorAbbrev CurCodeSize;
+
+  /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
+  /// selected BLOCK ID.
+  unsigned BlockInfoCurBID;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
+    unsigned StartSizeWord;
+    std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
+    Block(const NaClBitcodeSelectorAbbrev& PCS, unsigned SSW)
+        : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+  };
+
+  /// BlockScope - This tracks the current blocks that we have entered.
+  std::vector<Block> BlockScope;
+
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<NaClBitCodeAbbrev*> Abbrevs;
+  };
+  std::vector<BlockInfo> BlockInfoRecords;
+
+public:
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+
+private:
+  void WriteByte(unsigned char Value) {
+    Out.push_back(Value);
+  }
+
+  void WriteWord(unsigned Value) {
+    unsigned char Bytes[4] = {
+      (unsigned char)(Value >>  0),
+      (unsigned char)(Value >>  8),
+      (unsigned char)(Value >> 16),
+      (unsigned char)(Value >> 24) };
+    Out.append(&Bytes[0], &Bytes[4]);
+  }
+
+  unsigned GetBufferOffset() const {
+    return Out.size();
+  }
+
+  unsigned GetWordIndex() const {
+    unsigned Offset = GetBufferOffset();
+    assert((Offset & 3) == 0 && "Not 32-bit aligned");
+    return Offset / 4;
+  }
+
+public:
+  explicit NaClBitstreamWriter(SmallVectorImpl<char> &O)
+      : Out(O), CurBit(0), CurValue(0), CurCodeSize() {}
+
+  ~NaClBitstreamWriter() {
+    assert(CurBit == 0 && "Unflused data remaining");
+    assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
+
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  /// \brief Retrieve the current position in the stream, in bits.
+  uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
+
+  //===--------------------------------------------------------------------===//
+  // Basic Primitives for emitting bits to the stream.
+  //===--------------------------------------------------------------------===//
+
+  void Emit(uint32_t Val, unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 && "Invalid value size!");
+    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+    CurValue |= Val << CurBit;
+    if (CurBit + NumBits < 32) {
+      CurBit += NumBits;
+      return;
+    }
+
+    // Add the current word.
+    WriteWord(CurValue);
+
+    if (CurBit)
+      CurValue = Val >> (32-CurBit);
+    else
+      CurValue = 0;
+    CurBit = (CurBit+NumBits) & 31;
+  }
+
+  void Emit64(uint64_t Val, unsigned NumBits) {
+    if (NumBits <= 32)
+      Emit((uint32_t)Val, NumBits);
+    else {
+      Emit((uint32_t)Val, 32);
+      Emit((uint32_t)(Val >> 32), NumBits-32);
+    }
+  }
+
+  void FlushToWord() {
+    if (CurBit) {
+      WriteWord(CurValue);
+      CurBit = 0;
+      CurValue = 0;
+    }
+  }
+
+  void EmitVBR(uint32_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit(Val, NumBits);
+  }
+
+  void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
+    if ((uint32_t)Val == Val)
+      return EmitVBR((uint32_t)Val, NumBits);
+
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+           (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit((uint32_t)Val, NumBits);
+  }
+
+  /// EmitCode - Emit the specified code.
+  void EmitCode(unsigned Val) {
+    if (CurCodeSize.IsFixed)
+      Emit(Val, CurCodeSize.NumBits);
+    else
+      EmitVBR(Val, CurCodeSize.NumBits);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  BlockInfo *getBlockInfo(unsigned BlockID) {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+private:
+  // Enter block using CodeLen bits to read the size of the code
+  // selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen,
+                     BlockInfo *Info) {
+    // Block header:
+    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+    EmitCode(naclbitc::ENTER_SUBBLOCK);
+    EmitVBR(BlockID, naclbitc::BlockIDWidth);
+    assert(CodeLen.IsFixed && "Block codelens must be fixed");
+    EmitVBR(CodeLen.NumBits, naclbitc::CodeLenWidth);
+    FlushToWord();
+
+    unsigned BlockSizeWordIndex = GetWordIndex();
+    NaClBitcodeSelectorAbbrev OldCodeSize(CurCodeSize);
+
+    // Emit a placeholder, which will be replaced when the block is popped.
+    Emit(0, naclbitc::BlockSizeWidth);
+
+    CurCodeSize = CodeLen;
+
+    // Push the outer block's abbrev set onto the stack, start out with an
+    // empty abbrev set.
+    BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+    // If there is a blockinfo for this BlockID, add all the predefined abbrevs
+    // to the abbrev list.
+    if (Info) {
+      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
+           i != e; ++i) {
+        CurAbbrevs.push_back(Info->Abbrevs[i]);
+        Info->Abbrevs[i]->addRef();
+      }
+    }
+  }
+
+public:
+  /// \brief Enter block using CodeLen bits to read the size of the code
+  /// selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen) {
+    EnterSubblock(BlockID, CodeLen, getBlockInfo(BlockID));
+  }
+
+  /// \brief Enter block, using a code length based on the number of
+  /// (global) BlockInfo entries defined for the block. Note: This
+  /// should be used only if the block doesn't define any local abbreviations.
+  void EnterSubblock(unsigned BlockID) {
+    BlockInfo *Info = getBlockInfo(BlockID);
+    size_t NumAbbrevs = Info ? Info->Abbrevs.size() : 0;
+    NaClBitcodeSelectorAbbrev DefaultCodeLen(
+        naclbitc::DEFAULT_MAX_ABBREV+NumAbbrevs);
+    EnterSubblock(BlockID, DefaultCodeLen, Info);
+  }
+
+  /// \brief Enter block with the given number of abbreviations.
+  void EnterSubblock(unsigned BlockID, unsigned NumAbbrev) {
+    NaClBitcodeSelectorAbbrev CodeLenAbbrev(NumAbbrev);
+    EnterSubblock(BlockID, CodeLenAbbrev);
+  }
+
+  void ExitBlock() {
+    assert(!BlockScope.empty() && "Block scope imbalance!");
+
+    // Delete all abbrevs.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    const Block &B = BlockScope.back();
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    EmitCode(naclbitc::END_BLOCK);
+    FlushToWord();
+
+    // Compute the size of the block, in words, not counting the size field.
+    unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
+    unsigned ByteNo = B.StartSizeWord*4;
+
+    // Update the block size field in the header of this sub-block.
+    BackpatchWord(ByteNo, SizeInWords);
+
+    // Restore the inner block's code size and abbrev table.
+    CurCodeSize = B.PrevCodeSize;
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
+  /// record.  This is a no-op, since the abbrev specifies the literal to use.
+  template<typename uintty>
+  void EmitAbbreviatedLiteral(const NaClBitCodeAbbrevOp &Op, uintty V) {
+    assert(Op.isLiteral() && "Not a literal");
+    // If the abbrev specifies the literal value to use, don't emit
+    // anything.
+    assert(V == Op.getLiteralValue() &&
+           "Invalid abbrev for record!");
+  }
+
+  /// EmitAbbreviatedField - Emit a single scalar field value with the specified
+  /// encoding.
+  template<typename uintty>
+  void EmitAbbreviatedField(const NaClBitCodeAbbrevOp &Op, uintty V) {
+    assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
+
+    // Encode the value as we are commanded.
+    switch (Op.getEncoding()) {
+    default: llvm_unreachable("Unknown encoding!");
+    case NaClBitCodeAbbrevOp::Fixed:
+      if (Op.getEncodingData())
+        Emit((unsigned)V, (unsigned)Op.getEncodingData());
+      break;
+    case NaClBitCodeAbbrevOp::VBR:
+      if (Op.getEncodingData())
+        EmitVBR64(V, (unsigned)Op.getEncodingData());
+      break;
+    case NaClBitCodeAbbrevOp::Char6:
+      Emit(NaClBitCodeAbbrevOp::EncodeChar6((char)V), 6);
+      break;
+    }
+  }
+
+  /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
+  /// emission code.  If BlobData is non-null, then it specifies an array of
+  /// data that should be emitted as part of the Blob or Array operand that is
+  /// known to exist at the end of the record.
+  template<typename uintty>
+  void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                                StringRef Blob) {
+    const char *BlobData = Blob.data();
+    unsigned BlobLen = (unsigned) Blob.size();
+    unsigned AbbrevNo = Abbrev-naclbitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    NaClBitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
+
+    EmitCode(Abbrev);
+
+    unsigned RecordIdx = 0;
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      if (Op.isLiteral()) {
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      } else if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+        // Array case.
+        assert(i+2 == e && "array op not second to last?");
+        const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+        if (BlobData) {
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for array!");
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
+
+          // Emit each field.
+          for (unsigned i = 0; i != BlobLen; ++i)
+            EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
+
+          // Know that blob data is consumed for assertion below.
+          BlobData = 0;
+        } else {
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+
+          // Emit each field.
+          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
+            EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
+        }
+      } else if (Op.getEncoding() == NaClBitCodeAbbrevOp::Blob) {
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+
+        // Emit a vbr6 to indicate the number of elements present.
+        if (BlobData) {
+          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for blob operand!");
+        } else {
+          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+        }
+
+        // Flush to a 32-bit alignment boundary.
+        FlushToWord();
+
+        // Emit each field as a literal byte.
+        if (BlobData) {
+          for (unsigned i = 0; i != BlobLen; ++i)
+            WriteByte((unsigned char)BlobData[i]);
+
+          // Know that blob data is consumed for assertion below.
+          BlobData = 0;
+        } else {
+          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
+            assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
+            WriteByte((unsigned char)Vals[RecordIdx]);
+          }
+        }
+
+        // Align end to 32-bits.
+        while (GetBufferOffset() & 3)
+          WriteByte(0);
+      } else {  // Single scalar field.
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedField(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      }
+    }
+    assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
+    assert(BlobData == 0 &&
+           "Blob data specified for record that doesn't use it!");
+  }
+
+public:
+
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  template<typename uintty>
+  void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
+                  unsigned Abbrev = 0) {
+    if (!Abbrev) {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      EmitCode(naclbitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
+      for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
+        EmitVBR64(Vals[i], 6);
+      return;
+    }
+
+    // Insert the code into Vals to treat it uniformly.
+    Vals.insert(Vals.begin(), Code);
+
+    EmitRecordWithAbbrev(Abbrev, Vals);
+  }
+
+  /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
+  /// Unlike EmitRecord, the code for the record should be included in Vals as
+  /// the first entry.
+  template<typename uintty>
+  void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
+  }
+
+  /// EmitRecordWithBlob - Emit the specified record to the stream, using an
+  /// abbrev that includes a blob at the end.  The blob data to emit is
+  /// specified by the pointer and length specified at the end.  In contrast to
+  /// EmitRecord, this routine expects that the first entry in Vals is the code
+  /// of the record.
+  template<typename uintty>
+  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          StringRef Blob) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
+  }
+  template<typename uintty>
+  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const char *BlobData, unsigned BlobLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
+  }
+
+  /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
+  /// that end with an array.
+  template<typename uintty>
+  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          StringRef Array) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
+  }
+  template<typename uintty>
+  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const char *ArrayData, unsigned ArrayLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 
+                                                            ArrayLen));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  // Emit the abbreviation as a DEFINE_ABBREV record.
+  void EncodeAbbrev(NaClBitCodeAbbrev *Abbv) {
+    EmitCode(naclbitc::DEFINE_ABBREV);
+    EmitVBR(Abbv->getNumOperandInfos(), 5);
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      Emit(Op.isLiteral(), 1);
+      if (Op.isLiteral()) {
+        EmitVBR64(Op.getLiteralValue(), 8);
+      } else {
+        Emit(Op.getEncoding(), 3);
+        if (Op.hasEncodingData())
+          EmitVBR64(Op.getEncodingData(), 5);
+      }
+    }
+  }
+public:
+
+  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
+  /// method takes ownership of the specified abbrev.
+  unsigned EmitAbbrev(NaClBitCodeAbbrev *Abbv) {
+    // Emit the abbreviation as a record.
+    EncodeAbbrev(Abbv);
+    CurAbbrevs.push_back(Abbv);
+    return static_cast<unsigned>(CurAbbrevs.size())-1 +
+      naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // BlockInfo Block Emission
+  //===--------------------------------------------------------------------===//
+
+  /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
+  void EnterBlockInfoBlock() {
+    EnterSubblock(naclbitc::BLOCKINFO_BLOCK_ID);
+    BlockInfoCurBID = ~0U;
+  }
+private:
+  /// SwitchToBlockID - If we aren't already talking about the specified block
+  /// ID, emit a BLOCKINFO_CODE_SETBID record.
+  void SwitchToBlockID(unsigned BlockID) {
+    if (BlockInfoCurBID == BlockID) return;
+    SmallVector<unsigned, 2> V;
+    V.push_back(BlockID);
+    EmitRecord(naclbitc::BLOCKINFO_CODE_SETBID, V);
+    BlockInfoCurBID = BlockID;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (BlockInfo *BI = getBlockInfo(BlockID))
+      return *BI;
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+
+public:
+
+  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
+  /// BlockID.
+  unsigned EmitBlockInfoAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbv) {
+    SwitchToBlockID(BlockID);
+    EncodeAbbrev(Abbv);
+
+    // Add the abbrev to the specified block record.
+    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
+    Info.Abbrevs.push_back(Abbv);
+
+    return Info.Abbrevs.size()-1+naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
new file mode 100644
index 0000000000..a7c56b4aaf
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
@@ -0,0 +1,380 @@
+//===- NaClLLVMBitCodes.h ---------------------------------------*- C++ -*-===//
+//     Enum values for the NaCl bitcode wire format
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines Bitcode enum values for NaCl bitcode wire format.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLLLVMBITCODES_H
+#define LLVM_BITCODE_NACL_NACLLLVMBITCODES_H
+
+#include "llvm/Bitcode/NaCl/NaClBitCodes.h"
+
+namespace llvm {
+namespace naclbitc {
+  // The only top-level block type defined is for a module.
+  enum NaClBlockIDs {
+    // Blocks
+    MODULE_BLOCK_ID          = FIRST_APPLICATION_BLOCKID,
+
+    // Module sub-block id's.
+    PARAMATTR_BLOCK_ID,        // Not used in PNaCl.
+    PARAMATTR_GROUP_BLOCK_ID,  // Not used in PNaCl.
+
+    CONSTANTS_BLOCK_ID,
+    FUNCTION_BLOCK_ID,
+
+    UNUSED_ID1,
+
+    VALUE_SYMTAB_BLOCK_ID,
+    METADATA_BLOCK_ID,         // Not used in PNaCl.
+    METADATA_ATTACHMENT_ID,    // Not used in PNaCl.
+
+    TYPE_BLOCK_ID_NEW,
+
+    USELIST_BLOCK_ID,
+    GLOBALVAR_BLOCK_ID
+  };
+
+
+  /// MODULE blocks have a number of optional fields and subblocks.
+  enum NaClModuleCodes {
+    MODULE_CODE_VERSION     = 1,    // VERSION:     [version#]
+    MODULE_CODE_TRIPLE      = 2,    // Not used in PNaCl
+    MODULE_CODE_DATALAYOUT  = 3,    // Not used in PNaCl
+    MODULE_CODE_ASM         = 4,    // ASM:         [strchr x N]
+    MODULE_CODE_SECTIONNAME = 5,    // SECTIONNAME: [strchr x N]
+
+    // FIXME: Remove DEPLIB in 4.0.
+    MODULE_CODE_DEPLIB      = 6,    // DEPLIB:      [strchr x N]
+
+    MODULE_CODE_GLOBALVAR   = 7,    // Not used in PNaCl.
+
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    MODULE_CODE_FUNCTION    = 8,
+
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    MODULE_CODE_ALIAS       = 9,
+
+    // MODULE_CODE_PURGEVALS: [numvals]
+    MODULE_CODE_PURGEVALS   = 10,
+
+    MODULE_CODE_GCNAME      = 11   // GCNAME: [strchr x N]
+  };
+
+  /// PARAMATTR blocks have code for defining a parameter attribute set.
+  enum NaClAttributeCodes {
+    // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
+    PARAMATTR_CODE_ENTRY_OLD  = 1, // ENTRY: [paramidx0, attr0,
+                                   //         paramidx1, attr1...]
+    PARAMATTR_CODE_ENTRY      = 2, // ENTRY: [paramidx0, attrgrp0,
+                                   //         paramidx1, attrgrp1, ...]
+    PARAMATTR_GRP_CODE_ENTRY  = 3  // ENTRY: [id, attr0, att1, ...]
+  };
+
+  /// TYPE blocks have codes for each type primitive they use.
+  enum NaClTypeCodes {
+    TYPE_CODE_NUMENTRY =  1,    // NUMENTRY: [numentries]
+
+    // Type Codes
+    TYPE_CODE_VOID     =  2,    // VOID
+    TYPE_CODE_FLOAT    =  3,    // FLOAT
+    TYPE_CODE_DOUBLE   =  4,    // DOUBLE
+    TYPE_CODE_LABEL    =  5,    // LABEL
+    TYPE_CODE_OPAQUE   =  6,    // OPAQUE
+    TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
+    TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
+
+    TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty,
+                                //            paramty x N]
+
+    TYPE_CODE_HALF     =  10,   // HALF
+
+    TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
+    TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
+
+    // These are not with the other floating point types because they're
+    // a late addition, and putting them in the right place breaks
+    // binary compatibility.
+    TYPE_CODE_X86_FP80 = 13,    // X86 LONG DOUBLE
+    TYPE_CODE_FP128    = 14,    // LONG DOUBLE (112 bit mantissa)
+    TYPE_CODE_PPC_FP128= 15,    // PPC LONG DOUBLE (2 doubles)
+
+    TYPE_CODE_METADATA = 16,    // Not used in PNaCl.
+
+    TYPE_CODE_X86_MMX = 17,     // X86 MMX
+
+    TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
+    TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
+    TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
+
+    TYPE_CODE_FUNCTION = 21     // FUNCTION: [vararg, retty, paramty x N]
+  };
+
+  // The type symbol table only has one code (TST_ENTRY_CODE).
+  enum NaClTypeSymtabCodes {
+    TST_CODE_ENTRY = 1     // TST_ENTRY: [typeid, namechar x N]
+  };
+
+  // The value symbol table only has one code (VST_ENTRY_CODE).
+  enum NaClValueSymtabCodes {
+    VST_CODE_ENTRY   = 1,  // VST_ENTRY: [valid, namechar x N]
+    VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
+  };
+
+  // Not used in PNaCl.
+  enum NaClMetadataCodes {
+    METADATA_STRING        = 1,   // MDSTRING:      [values]
+    // 2 is unused.
+    // 3 is unused.
+    METADATA_NAME          = 4,   // STRING:        [values]
+    // 5 is unused.
+    METADATA_KIND          = 6,   // [n x [id, name]]
+    // 7 is unused.
+    METADATA_NODE          = 8,   // NODE:          [n x (type num, value num)]
+    METADATA_FN_NODE       = 9,   // FN_NODE:       [n x (type num, value num)]
+    METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
+    METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
+  };
+
+  // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
+  // constant and maintains an implicit current type value.
+  enum NaClConstantsCodes {
+    CST_CODE_SETTYPE       =  1,  // SETTYPE:       [typeid]
+    CST_CODE_NULL          =  2,  // NULL
+    CST_CODE_UNDEF         =  3,  // UNDEF
+    CST_CODE_INTEGER       =  4,  // INTEGER:       [intval]
+    CST_CODE_WIDE_INTEGER  =  5,  // WIDE_INTEGER:  [n x intval]
+    CST_CODE_FLOAT         =  6,  // FLOAT:         [fpval]
+    CST_CODE_AGGREGATE     =  7,  // AGGREGATE:     [n x value number]
+    CST_CODE_STRING        =  8,  // STRING:        [values]
+    CST_CODE_CSTRING       =  9,  // CSTRING:       [values]
+    CST_CODE_CE_BINOP      = 10,  // CE_BINOP:      [opcode, opval, opval]
+    CST_CODE_CE_CAST       = 11,  // CE_CAST:       [opcode, opty, opval]
+    CST_CODE_CE_GEP        = 12,  // CE_GEP:        [n x operands]
+    CST_CODE_CE_SELECT     = 13,  // CE_SELECT:     [opval, opval, opval]
+    CST_CODE_CE_EXTRACTELT = 14,  // CE_EXTRACTELT: [opty, opval, opval]
+    CST_CODE_CE_INSERTELT  = 15,  // CE_INSERTELT:  [opval, opval, opval]
+    CST_CODE_CE_SHUFFLEVEC = 16,  // CE_SHUFFLEVEC: [opval, opval, opval]
+    CST_CODE_CE_CMP        = 17,  // CE_CMP:        [opty, opval, opval, pred]
+    CST_CODE_INLINEASM_OLD = 18,  // INLINEASM:     [sideeffect|alignstack,
+                                  //                 asmstr,conststr]
+    CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
+    CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP:  [n x operands]
+    CST_CODE_BLOCKADDRESS  = 21,  // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
+    CST_CODE_DATA          = 22,  // DATA:          [n x elements]
+    CST_CODE_INLINEASM     = 23   // INLINEASM:     [sideeffect|alignstack|
+                                  //                 asmdialect,asmstr,conststr]
+  };
+
+  /// GlobalVarOpcodes - These are values used in the bitcode files to
+  /// encode records defining global variables.
+  ///
+  /// The structure of global variables can be summarized as follows:
+  ///
+  /// The global variable block begins with a GLOBALVAR_COUNT, defining
+  /// the number of global variables in the bitcode file. After that,
+  /// each global variable is defined.
+  ///
+  /// Global variables are defined by a GLOBALVAR_VAR record, followed
+  /// by 1 or more records defining its initial value. Simple
+  /// variables have a single initializer. Structured variables are
+  /// defined by an initial GLOBALVAR_COMPOUND record defining the
+  /// number of fields in the structure, followed by an initializer
+  /// for each of its fields. In this context, a field is either data,
+  /// or a relocation.  A data field is defined by a
+  /// GLOBALVAR_ZEROFILL or GLOBALVAR_DATA record.  A relocation field
+  /// is defined by a GLOBALVAR_RELOC record.
+  enum NaClGlobalVarOpcodes {
+    GLOBALVAR_VAR        = 0,     // VAR: [align, isconst]
+    GLOBALVAR_COMPOUND   = 1,     // COMPOUND: [size]
+    GLOBALVAR_ZEROFILL   = 2,     // ZEROFILL: [size]
+    GLOBALVAR_DATA       = 3,     // DATA: [b0, b1, ...]
+    GLOBALVAR_RELOC      = 4,     // RELOC: [val, [addend]]
+    GLOBALVAR_COUNT      = 5      // COUNT: [n]
+  };
+
+  /// CastOpcodes - These are values used in the bitcode files to encode which
+  /// cast a CST_CODE_CE_CAST or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClCastOpcodes {
+    CAST_TRUNC    =  0,
+    CAST_ZEXT     =  1,
+    CAST_SEXT     =  2,
+    CAST_FPTOUI   =  3,
+    CAST_FPTOSI   =  4,
+    CAST_UITOFP   =  5,
+    CAST_SITOFP   =  6,
+    CAST_FPTRUNC  =  7,
+    CAST_FPEXT    =  8,
+    CAST_PTRTOINT =  9,
+    CAST_INTTOPTR = 10,
+    CAST_BITCAST  = 11
+  };
+
+  /// BinaryOpcodes - These are values used in the bitcode files to encode which
+  /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClBinaryOpcodes {
+    BINOP_ADD  =  0,
+    BINOP_SUB  =  1,
+    BINOP_MUL  =  2,
+    BINOP_UDIV =  3,
+    BINOP_SDIV =  4,    // overloaded for FP
+    BINOP_UREM =  5,
+    BINOP_SREM =  6,    // overloaded for FP
+    BINOP_SHL  =  7,
+    BINOP_LSHR =  8,
+    BINOP_ASHR =  9,
+    BINOP_AND  = 10,
+    BINOP_OR   = 11,
+    BINOP_XOR  = 12
+  };
+
+  /// These are values used in the bitcode files to encode AtomicRMW operations.
+  /// The values of these enums have no fixed relation to the LLVM IR enum
+  /// values.  Changing these will break compatibility with old files.
+  enum NaClRMWOperations {
+    RMW_XCHG = 0,
+    RMW_ADD = 1,
+    RMW_SUB = 2,
+    RMW_AND = 3,
+    RMW_NAND = 4,
+    RMW_OR = 5,
+    RMW_XOR = 6,
+    RMW_MAX = 7,
+    RMW_MIN = 8,
+    RMW_UMAX = 9,
+    RMW_UMIN = 10
+  };
+
+  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
+  /// OverflowingBinaryOperator's SubclassOptionalData contents.
+  enum NaClOverflowingBinaryOperatorOptionalFlags {
+    OBO_NO_UNSIGNED_WRAP = 0,
+    OBO_NO_SIGNED_WRAP = 1
+  };
+
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing
+  /// PossiblyExactOperator's SubclassOptionalData contents.
+  enum NaClPossiblyExactOperatorOptionalFlags {
+    PEO_EXACT = 0
+  };
+
+  /// \brief Flags for serializing floating point binary operators's
+  /// SubclassOptionalData contents.
+  enum NaClFloatingPointBinaryOperatorOptionalFlags {
+    FPO_UNSAFE_ALGEBRA = 0,
+    FPO_NO_NANS = 1,
+    FPO_NO_INFS = 2,
+    FPO_NO_SIGNED_ZEROS = 3,
+    FPO_ALLOW_RECIPROCAL = 4
+  };
+
+  /// Encoded AtomicOrdering values.
+  enum NaClAtomicOrderingCodes {
+    ORDERING_NOTATOMIC = 0,
+    ORDERING_UNORDERED = 1,
+    ORDERING_MONOTONIC = 2,
+    ORDERING_ACQUIRE = 3,
+    ORDERING_RELEASE = 4,
+    ORDERING_ACQREL = 5,
+    ORDERING_SEQCST = 6
+  };
+
+  /// Encoded function calling conventions.
+  enum NaClCallingConventions {
+    C_CallingConv = 0
+  };
+
+  /// Encoded SynchronizationScope values.
+  enum NaClAtomicSynchScopeCodes {
+    SYNCHSCOPE_SINGLETHREAD = 0,
+    SYNCHSCOPE_CROSSTHREAD = 1
+  };
+
+  // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
+  // can contain a constant block (CONSTANTS_BLOCK_ID).
+  enum NaClFunctionCodes {
+    FUNC_CODE_DECLAREBLOCKS    =  1, // DECLAREBLOCKS: [n]
+
+    FUNC_CODE_INST_BINOP       =  2, // BINOP:      [opval, opval, opcode
+                                     //              [, flags]]
+    FUNC_CODE_INST_CAST        =  3, // CAST:       [opval, destty, castopc]
+    FUNC_CODE_INST_GEP         =  4, // GEP:        [n x operands]
+    FUNC_CODE_INST_SELECT      =  5, // SELECT:     [opval, opval, opval]
+    FUNC_CODE_INST_EXTRACTELT  =  6, // EXTRACTELT: [opval, opval]
+    FUNC_CODE_INST_INSERTELT   =  7, // INSERTELT:  [opval, opval, opval]
+    FUNC_CODE_INST_SHUFFLEVEC  =  8, // SHUFFLEVEC: [opval, opval, opval]
+    FUNC_CODE_INST_CMP         =  9, // CMP:        [opval, opval, pred]
+
+    FUNC_CODE_INST_RET         = 10, // RET:        [opval<optional>]
+    FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
+    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
+    FUNC_CODE_INST_INVOKE      = 13, // No longer allowed.
+    // 14 is unused.
+    FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
+
+    FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
+    // 17 is unused.
+    // 18 is unused.
+    FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [op, align]
+    FUNC_CODE_INST_LOAD        = 20, // LOAD:       [op, align, vol]
+    // 21 is unused.
+    // 22 is unused.
+    FUNC_CODE_INST_VAARG       = 23, // VAARG:      [valistty, valist, instty]
+    // This store code encodes the pointer type, rather than the value type
+    // this is so information only available in the pointer type (e.g. address
+    // spaces) is retained.
+    FUNC_CODE_INST_STORE       = 24, // STORE:      [ptr, val, align, vol]
+    // 25 is unused.
+    FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [opval, n x indices]
+    FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [opval, opval, n x indices]
+    // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
+    // support legacy vicmp/vfcmp instructions.
+    FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opval, opval, pred]
+    // new select on i1 or [N x i1]
+    FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [opval, opval, pred]
+    FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
+    FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
+    // 32 is unused.
+    FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // Not used in PNaCl.
+
+    FUNC_CODE_INST_CALL        = 34, // CALL:       [cc, fnid, args...]
+
+    FUNC_CODE_DEBUG_LOC        = 35, // Not used in PNaCl.
+    FUNC_CODE_INST_FENCE       = 36, // FENCE: [ordering, synchscope]
+    FUNC_CODE_INST_CMPXCHG     = 37, // CMPXCHG: [ptr, cmp, new, align, vol,
+                                     //           ordering, synchscope]
+    FUNC_CODE_INST_ATOMICRMW   = 38, // ATOMICRMW: [ptr,val, operation,
+                                     //             align, vol,
+                                     //             ordering, synchscope]
+    FUNC_CODE_INST_RESUME      = 39, // RESUME:     [opval]
+    FUNC_CODE_INST_LANDINGPAD  = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...]
+    FUNC_CODE_INST_LOADATOMIC  = 41, // LOAD: [op, align, vol,
+                                     //        ordering, synchscope]
+    FUNC_CODE_INST_STOREATOMIC = 42, // STORE: [ptr, val, align, vol
+                                     //         ordering, synchscope]
+    FUNC_CODE_INST_FORWARDTYPEREF = 43 // TYPE: [opval, ty]
+  };
+
+  enum NaClUseListCodes {
+    USELIST_CODE_ENTRY = 1   // USELIST_CODE_ENTRY: TBD.
+  };
+} // End naclbitc namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
new file mode 100644
index 0000000000..53feb8ab86
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -0,0 +1,75 @@
+//===-- llvm/Bitcode/NaCl/NaClReaderWriter.h - ------------------*- C++ -*-===//
+//      NaCl Bitcode reader/writer.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLREADERWRITER_H
+#define LLVM_BITCODE_NACL_NACLREADERWRITER_H
+
+#include <string>
+
+namespace llvm {
+  class MemoryBuffer;
+  class DataStreamer;
+  class LLVMContext;
+  class Module;
+  class raw_ostream;
+
+  /// getNaClLazyBitcodeModule - Read the header of the specified bitcode buffer
+  /// and prepare for lazy deserialization of function bodies.  If successful,
+  /// this takes ownership of 'buffer' and returns a non-null pointer.  On
+  /// error, this returns null, *does not* take ownership of Buffer, and fills
+  /// in *ErrMsg with an error description if ErrMsg is non-null.
+  Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0,
+                                   bool AcceptSupportedOnly = true);
+
+  /// getNaClStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  Module *getNaClStreamedBitcodeModule(const std::string &name,
+                                       DataStreamer *streamer,
+                                       LLVMContext &Context,
+                                       std::string *ErrMsg = 0,
+                                       bool AcceptSupportedOnly = true);
+
+  /// NaClParseBitcodeFile - Read the specified bitcode file,
+  /// returning the module.  If an error occurs, this returns null and
+  /// fills in *ErrMsg if it is non-null.  This method *never* takes
+  /// ownership of Buffer.
+  Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
+                               std::string *ErrMsg = 0,
+                               bool AcceptSupportedOnly = true);
+
+  /// NaClWriteBitcodeToFile - Write the specified module to the
+  /// specified raw output stream, using PNaCl wire format.  For
+  /// streams where it matters, the given stream should be in "binary"
+  /// mode.
+  void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out);
+
+  /// isNaClBitcode - Return true if the given bytes are the magic bytes for
+  /// PNaCl bitcode wire format.
+  ///
+  inline bool isNaClBitcode(const unsigned char *BufPtr,
+                        const unsigned char *BufEnd) {
+    return BufPtr+4 <= BufEnd &&
+        BufPtr[0] == 'P' &&
+        BufPtr[1] == 'E' &&
+        BufPtr[2] == 'X' &&
+        BufPtr[3] == 'E';
+  }
+
+} // end llvm namespace
+#endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index c2fd6ce367..aa3637ce28 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -95,6 +95,12 @@ namespace llvm {
     /// default, this is equal to CurrentFnSym.
     MCSymbol *CurrentFnSymForSize;
 
+    /// @LOCALMOD-BEGIN
+    /// Is the bitcode module a plain object? This is false
+    /// for shared (pso) and executable (pexe) files.
+    bool IsPlainObject;
+    /// @LOCALMOD-END
+
   private:
     // GCMetadataPrinters - The garbage collection metadata printer table.
     void *GCMetadataPrinters;  // Really a DenseMap.
@@ -244,6 +250,18 @@ namespace llvm {
     // Targets can, or in the case of EmitInstruction, must implement these to
     // customize output.
 
+    // @LOCALMOD-START
+    /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata.
+    virtual bool UseReadOnlyJumpTables() const { return false; }
+    /// GetTargetBasicBlockAlign - the target alignment for basic blocks.
+    virtual unsigned GetTargetBasicBlockAlign() const { return 0; }
+    /// GetTargetLabelAlign - Get optional alignment for TargetOpcode
+    /// labels E.g., EH_LABEL.
+    virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const {
+      return 0;
+    }
+    // @LOCALMOD-END
+
     /// EmitStartOfAsmFile - This virtual method can be overridden by targets
     /// that want to emit something at the start of their file.
     virtual void EmitStartOfAsmFile(Module &) {}
@@ -258,7 +276,12 @@ namespace llvm {
 
     /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
     /// the last basic block in the function.
-    virtual void EmitFunctionBodyEnd() {}
+    virtual void EmitFunctionBodyEnd() {
+      // @LOCALMOD-START
+      unsigned NextFunctionAlignment = GetTargetBasicBlockAlign();
+      if (NextFunctionAlignment) EmitAlignment(NextFunctionAlignment);
+      // @LOCALMOD-END
+    }
 
     /// EmitInstruction - Targets should implement this to emit instructions.
     virtual void EmitInstruction(const MachineInstr *) {
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index fa9d60f0d4..0b401f8f8e 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -213,6 +213,7 @@ private:
   // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
   // during argument analysis.
   unsigned InRegsParamsProceed;
+  bool HasByValInRegPosition;  // @LOCALMOD -- ARM only: see comment below.
 
 protected:
   ParmContext CallOrPrologue;
@@ -394,6 +395,19 @@ public:
     ByValRegs.clear();
   }
 
+  // @LOCALMOD-BEGIN
+  // We disabled the splitting of byval between registers and memory.
+  // This separate flag indicates that a byval existed.  We cannot reuse
+  // isFirstByValRegValid() because that is already used by the broken
+  // mechanism of splitting between stack and regs.  We should check
+  // again if this mechanism is still broken later, or try to fix that
+  // mechanism.
+  // NOTE: this is only for ARM, so should be refactored.
+  bool hasByValInRegPosition() const { return HasByValInRegPosition; }
+  void setHasByValInRegPosition() { HasByValInRegPosition = true; }
+  void clearHasByValInRegPosition() { HasByValInRegPosition = false; }
+  // @LOCALMOD-END
+
   ParmContext getCallOrPrologue() const { return CallOrPrologue; }
 
 private:
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index 0fd211b4a8..eda8a571a1 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -638,6 +638,19 @@ namespace ISD {
     /// is the chain and the second operand is the alloca pointer.
     LIFETIME_START, LIFETIME_END,
 
+    // @LOCALMOD-BEGIN
+    // NACL_* - Native Client instrinsics.
+    // NACL_READ_TP is a fast built-in version of NaCl's tls_get() IRT
+    // interface.
+    NACL_READ_TP,
+    // These correspond to functions in:
+    // native_client/src/untrusted/nacl/tls_params.h
+    NACL_TP_TLS_OFFSET,
+    NACL_TP_TDB_OFFSET,
+    // Expands to the target architecture enumeration value.
+    NACL_TARGET_ARCH,
+    // @LOCALMOD-END
+
     /// BUILTIN_OP_END - This must be the last enum value in this list.
     /// The target-specific pre-isel opcode values start here.
     BUILTIN_OP_END
diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h
index 9a73214186..bc12b93d75 100644
--- a/include/llvm/CodeGen/JITCodeEmitter.h
+++ b/include/llvm/CodeGen/JITCodeEmitter.h
@@ -289,7 +289,7 @@ public:
 
   /// getCurrentPCOffset - Return the offset from the start of the emitted
   /// buffer that we are currently writing to.
-  uintptr_t getCurrentPCOffset() const {
+  virtual uintptr_t getCurrentPCOffset() const { // @LOCALMOD
     return CurBufferPtr-BufferBegin;
   }
 
@@ -334,6 +334,13 @@ public:
   /// getLabelLocations - Return the label locations map of the label IDs to
   /// their address.
   virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { return 0; }
+
+  // @LOCALMOD-START
+  virtual void beginBundleLock() {};
+  virtual void endBundleLock() {};
+  virtual void alignToBundleBeginning() {};
+  virtual void alignToBundleEnd() {};
+  // @LOCALMOD-END
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/LexicalScopes.h b/include/llvm/CodeGen/LexicalScopes.h
index ff65db4ee4..5eeb5b8b08 100644
--- a/include/llvm/CodeGen/LexicalScopes.h
+++ b/include/llvm/CodeGen/LexicalScopes.h
@@ -159,6 +159,14 @@ public:
   LexicalScope(LexicalScope *P, const MDNode *D, const MDNode *I, bool A)
     : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(A),
       LastInsn(0), FirstInsn(0), DFSIn(0), DFSOut(0) {
+    // @LOCALMOD-BEGIN -- Hack for bug
+    // http://code.google.com/p/nativeclient/issues/detail?id=2786
+#ifndef NDEBUG
+    Desc.make_weak();
+    InlinedAtLocation.make_weak();
+#endif
+    // @LOCALMOD-END
+
     if (Parent)
       Parent->addChild(this);
   }
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 916c0f233e..faacad7019 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -34,7 +34,10 @@ namespace {
       (void) llvm::createFastRegisterAllocator();
       (void) llvm::createBasicRegisterAllocator();
       (void) llvm::createGreedyRegisterAllocator();
+#if !defined(__native_client__)
+      // Not needed by sandboxed translator.
       (void) llvm::createDefaultPBQPRegisterAllocator();
+#endif
 
       llvm::linkOcamlGC();
       llvm::linkErlangGC();
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 8ed215d75b..827a9f81e8 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -57,6 +57,17 @@ public:
 
   virtual void addSelectionDAGCSEId(FoldingSetNodeID &ID) = 0;
 
+  // @LOCALMOD-START
+  /// getJumpTableIndex - Check if this is a reference to a jump table.
+  /// If so, return a pointer to the jump table index value that is stored
+  /// in the constant pool, else return 0.
+  /// The default behavior is to indicate that the value is not a jump table
+  /// index. This is used by BranchFolder::runOnMachineFunction() and only in
+  /// conjunction with ARM targets
+  /// TODO: this should be cleaned up as it does tripple duty: tester, setter, getter
+  virtual unsigned *getJumpTableIndex() { return 0; }
+  // @LOCALMOD-END
+
   /// print - Implement operator<<
   virtual void print(raw_ostream &O) const = 0;
 };
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 92c8da991c..732958ed40 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -314,6 +314,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return BuildMI(BB, MII, DL, MCID);
 }
 
+// @LOCALMOD-BEGIN
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction before the given position in the given MachineBasicBlock,
+/// does NOT take a destination register, and does not add implicit operands.
+///
+inline MachineInstrBuilder BuildMI_NoImp(MachineBasicBlock &BB,
+                                         MachineBasicBlock::iterator I,
+                                         DebugLoc DL,
+                                         const MCInstrDesc &MCID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL, true);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(*BB.getParent(), MI);
+}
+// @LOCALMOD-END
+
 /// BuildMI - This version of the builder inserts the newly-built
 /// instruction at the end of the given MachineBasicBlock, and does NOT take a
 /// destination register.
diff --git a/include/llvm/CodeGen/MachineRelocation.h b/include/llvm/CodeGen/MachineRelocation.h
index 244b466e17..8d71930882 100644
--- a/include/llvm/CodeGen/MachineRelocation.h
+++ b/include/llvm/CodeGen/MachineRelocation.h
@@ -197,6 +197,14 @@ public:
     return Offset;
   }
 
+  // @LOCALMOD-START
+  /// setMachineCodeOffset() - Adjust the offset in the code buffer (this is
+  /// used when the instruction is moved after emission for bundle alignment)
+  void setMachineCodeOffset(intptr_t offset) {
+    Offset = offset;
+  }
+  // @LOCALMOD-END
+
   /// getRelocationType - Return the target-specific relocation ID for this
   /// relocation.
   unsigned getRelocationType() const {
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 260302a594..5bd70856e9 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -77,6 +77,26 @@ public:
     removeDeadConstantUsers();   // remove any dead constants using this.
   }
 
+  // @LOCALMOD-BEGIN
+  /// Set the symbol version for this definition.
+  void setVersionDef(StringRef Version, bool IsDefault);
+
+  /// Set the symbol version and dynamic source file (soname)
+  /// for this exterally provided global.
+  void setNeeded(StringRef Version, StringRef DynFile);
+
+  /// Get the name of this symbol without the version suffix.
+  StringRef getUnversionedName() const;
+
+  /// Get the version of this symbol.
+  /// Returns an empty string if the symbol is unversioned.
+  StringRef getVersion() const;
+
+  /// Returns true if this is the default version of the symbol.
+  /// This may only be called if the symbol is versioned.
+  bool isDefaultVersion() const;
+  // @LOCALMOD-END
+
   unsigned getAlignment() const {
     return (1u << Alignment) >> 1;
   }
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index e252664e45..3e49620435 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -347,6 +347,9 @@ def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
 def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
 def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
 
+// __builtin_unwind_init is an undocumented GCC intrinsic that causes all
+// callee-saved registers to be saved and restored (regardless of whether they
+// are used) in the calling function. It is used by libgcc_eh.
 def int_eh_unwind_init: Intrinsic<[]>,
                         GCCBuiltin<"__builtin_unwind_init">;
 
@@ -470,6 +473,33 @@ def int_convertus  : Intrinsic<[llvm_anyint_ty],
 def int_convertuu  : Intrinsic<[llvm_anyint_ty],
                                [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
 
+// @LOCALMOD-BEGIN
+//===----------------------- Native Client Intrinsics ---------------------===//
+// NaCl-specific setjmp/longjmp intrinsics.
+// See https://code.google.com/p/nativeclient/issues/detail?id=3429
+def int_nacl_setjmp   : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
+def int_nacl_longjmp  : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrNoReturn]>;
+
+// Fast built-in version of NaCl's tls_get() IRT interface.
+def int_nacl_read_tp : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+
+// The following intrinsics provide target-specific implementations of
+// the interface in native_client/src/untrusted/nacl/tls_params.h.
+// The intrinsic names are basically the functions there without the
+// leading underscores.
+def int_nacl_tp_tls_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tls_offset">;
+def int_nacl_tp_tdb_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tdb_offset">;
+
+// The following intrinsic provides a target-specific constant value to
+// indicate the target platform compiled to.  The enum values are enumerated
+// pnaclintrin.h.
+def int_nacl_target_arch : Intrinsic<[llvm_i32_ty], []>,
+                            GCCBuiltin<"__builtin_nacl_target_arch">;
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index cb500ffe7c..937ccb4971 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/DataTypes.h"
+#include <vector> // @LOCALMOD
 
 namespace llvm {
 
@@ -121,7 +122,10 @@ public:
   typedef iplist<GlobalAlias> AliasListType;
   /// The type for the list of named metadata.
   typedef ilist<NamedMDNode> NamedMDListType;
-
+  // @LOCALMOD-BEGIN
+  /// The type for the list of dependent libraries.
+  typedef std::vector<std::string> LibraryListType;
+  // @LOCALMOD-END
   /// The Global Variable iterator.
   typedef GlobalListType::iterator                      global_iterator;
   /// The Global Variable constant iterator.
@@ -141,7 +145,10 @@ public:
   typedef NamedMDListType::iterator             named_metadata_iterator;
   /// The named metadata constant interators.
   typedef NamedMDListType::const_iterator const_named_metadata_iterator;
-
+  // @LOCALMOD-BEGIN
+  /// The Library list iterator.
+  typedef LibraryListType::const_iterator lib_iterator;
+  // @LOCALMOD-END
   /// An enumeration for describing the endianess of the target machine.
   enum Endianness  { AnyEndianness, LittleEndian, BigEndian };
 
@@ -189,6 +196,22 @@ public:
       : Behavior(B), Key(K), Val(V) {}
   };
 
+  /// @LOCALMOD-BEGIN
+  /// An enumeration for describing the module format
+  enum OutputFormat {
+    ObjectOutputFormat,
+    SharedOutputFormat,
+    ExecutableOutputFormat
+  };
+
+  /// A structure describing the symbols needed from an external file.
+  struct NeededRecord {
+    std::string              DynFile; // Source file (soname)
+    std::vector<std::string> Symbols; // List of symbol names
+                                      // (with version suffix)
+  };
+  /// @LOCALMOD-END
+  
 /// @}
 /// @name Member Variables
 /// @{
@@ -198,6 +221,8 @@ private:
   GlobalListType GlobalList;      ///< The Global Variables in the module
   FunctionListType FunctionList;  ///< The Functions in the module
   AliasListType AliasList;        ///< The Aliases in the module
+  // @LOCALMOD
+  LibraryListType LibraryList;    ///< The Libraries needed by the module
   NamedMDListType NamedMDList;    ///< The named metadata in the module
   std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
   ValueSymbolTable *ValSymTab;    ///< Symbol table for values
@@ -205,6 +230,9 @@ private:
   std::string ModuleID;           ///< Human readable identifier for the module
   std::string TargetTriple;       ///< Platform target triple Module compiled on
   std::string DataLayout;         ///< Target data description
+  // @LOCALMOD-BEGIN
+  mutable std::string ModuleSOName; ///< Module SOName (for shared format)
+  // @LOCALMOD-END
   void *NamedMDSymTab;            ///< NamedMDNode names.
 
   friend class Constant;
@@ -236,6 +264,24 @@ public:
   /// @returns a string containing the target triple.
   const std::string &getTargetTriple() const { return TargetTriple; }
 
+  // @LOCALMOD-BEGIN
+
+  /// Get the module format
+  /// @returns the module format
+  OutputFormat getOutputFormat() const;
+
+  /// Get the SOName of this module.
+  /// @returns a string containing the module soname
+  const std::string &getSOName() const;
+
+  /// Record the needed information for a global value.
+  /// This creates a needed record for DynFile, if one does not already exist.
+  void addNeededRecord(StringRef DynFile, GlobalValue *GV);
+
+  // Fill NeededOut with all needed records present in the module.
+  void getNeededRecords(std::vector<NeededRecord> *NeededOut) const;
+  // @LOCALMOD-END
+
   /// Get the target endian information.
   /// @returns Endianess - an enumeration for the endianess of the target
   Endianness getEndianness() const;
@@ -265,6 +311,18 @@ public:
   /// Set the target triple.
   void setTargetTriple(StringRef T) { TargetTriple = T; }
 
+  /// @LOCALMOD-BEGIN
+
+  /// Set the module format
+  void setOutputFormat(OutputFormat F);
+
+  /// For modules with output format "shared", set the output soname.
+  void setSOName(StringRef Name);
+
+  /// Wrap a global symbol.
+  void wrapSymbol(StringRef SymName);
+  /// @LOCALMOD-END
+
   /// Set the module-scope inline assembly blocks.
   void setModuleInlineAsm(StringRef Asm) {
     GlobalScopeAsm = Asm;
@@ -527,8 +585,28 @@ public:
   const_iterator          end  () const { return FunctionList.end();   }
   size_t                  size() const  { return FunctionList.size(); }
   bool                    empty() const { return FunctionList.empty(); }
+  // @LOCALMOD-BEGIN
+/// @}
+/// @name Dependent Library Iteration
+/// @{
+
+  /// @brief Get a constant iterator to beginning of dependent library list.
+  inline lib_iterator lib_begin() const { return LibraryList.begin(); }
+  /// @brief Get a constant iterator to end of dependent library list.
+  inline lib_iterator lib_end()   const { return LibraryList.end();   }
+  /// @brief Returns the number of items in the list of libraries.
+  inline size_t       lib_size()  const { return LibraryList.size();  }
+  void convertMetadataToLibraryList();
+  void convertLibraryListToMetadata() const;
+  /// @brief Add a library to the list of dependent libraries
+  void addLibrary(StringRef Lib);
+  /// @brief Remove a library from the list of dependent libraries
+  void removeLibrary(StringRef Lib);
+  /// @brief Get all the libraries
+  inline const LibraryListType& getLibraries() const { return LibraryList; }
 
 /// @}
+  // @LOCALMOD-END
 /// @name Alias Iteration
 /// @{
 
@@ -569,6 +647,11 @@ public:
   /// Dump the module to stderr (for debugging).
   void dump() const;
   
+  /// @LOCALMOD-BEGIN
+  /// Print the PNaCl metadata for the module.
+  void dumpMeta(raw_ostream &OS) const;
+  /// @LOCALMOD-END
+
   /// This function causes all the subinstructions to "let go" of all references
   /// that they are maintaining.  This allows one to 'delete' a whole class at
   /// a time, even though there may be circular references... first all
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index e2ae5f7164..4446e446cf 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -50,6 +50,32 @@ Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context);
 Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
                     LLVMContext &Context);
 
+// @LOCALMOD-BEGIN
+// \brief Define the expected format of the file.
+enum NaClFileFormat {
+  // LLVM IR source or bitcode file (as appropriate).
+  LLVMFormat,
+  // PNaCl bitcode file.
+  PNaClFormat
+};
+
+// \brief If the given MemoryBuffer holds a bitcode image, return a Module
+// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
+// a Module for it. This function *always* takes ownership of the given
+// MemoryBuffer.
+Module *NaClParseIR(MemoryBuffer *Buffer,
+                    NaClFileFormat Format,
+                    SMDiagnostic &Err,
+                    LLVMContext &Context);
+
+/// \brief If the given file holds a Bitcode image, read the file.
+/// Otherwise, attempt to parse it as LLVM assembly and return a
+/// Module for it.
+Module *NaClParseIRFile(const std::string &Filename,
+                        NaClFileFormat Format,
+                        SMDiagnostic &Err,
+                        LLVMContext &Context);
+// @LOCALMOD-END
 }
 
 #endif
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 5b2cd603c3..b5c478326e 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -274,6 +274,35 @@ void initializeLoopVectorizePass(PassRegistry&);
 void initializeSLPVectorizerPass(PassRegistry&);
 void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
+// @LOCALMOD-BEGIN
+void initializeAddPNaClExternalDeclsPass(PassRegistry&);
+void initializeCanonicalizeMemIntrinsicsPass(PassRegistry&);
+void initializeExpandArithWithOverflowPass(PassRegistry&);
+void initializeExpandByValPass(PassRegistry&);
+void initializeExpandConstantExprPass(PassRegistry&);
+void initializeExpandCtorsPass(PassRegistry&);
+void initializeExpandGetElementPtrPass(PassRegistry&);
+void initializeExpandSmallArgumentsPass(PassRegistry&);
+void initializeExpandStructRegsPass(PassRegistry&);
+void initializeExpandTlsConstantExprPass(PassRegistry&);
+void initializeExpandTlsPass(PassRegistry&);
+void initializeExpandVarArgsPass(PassRegistry&);
+void initializeFlattenGlobalsPass(PassRegistry&);
+void initializeGlobalCleanupPass(PassRegistry&);
+void initializeInsertDivideCheckPass(PassRegistry&);
+void initializeNaClCcRewritePass(PassRegistry&);
+void initializePNaClABIVerifyModulePass(PassRegistry&);
+void initializePNaClABIVerifyFunctionsPass(PassRegistry&);
+void initializePromoteI1OpsPass(PassRegistry&);
+void initializePromoteIntegersPass(PassRegistry&);
+void initializeReplacePtrsWithIntsPass(PassRegistry&);
+void initializeResolveAliasesPass(PassRegistry&);
+void initializeResolvePNaClIntrinsicsPass(PassRegistry&);
+void initializeRewriteLLVMIntrinsicsPass(PassRegistry&);
+void initializeRewritePNaClLibraryCallsPass(PassRegistry&);
+void initializeStripAttributesPass(PassRegistry&);
+void initializeStripMetadataPass(PassRegistry&);
+// @LOCALMOD-END
 }
 
 #endif
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 9a6b703408..685f2cb89f 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -25,6 +25,7 @@ class MCInst;
 class MCRelaxableFragment;
 class MCObjectWriter;
 class MCSection;
+class MCStreamer;
 class MCValue;
 class raw_ostream;
 
@@ -160,6 +161,16 @@ public:
   /// handleAssemblerFlag - Handle any target-specific assembler flags.
   /// By default, do nothing.
   virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
+  
+  // @LOCALMOD-BEGIN
+  /// CustomExpandInst -
+  ///   If the MCInst instruction has a custom expansion, write it to the
+  /// MCStreamer 'Out'. This can be used to perform "last minute" rewrites of
+  /// MCInst instructions for emission.
+  virtual bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return false;
+  }
+  // @LOCALMOD-END
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 65dd1e8998..e2c8ba3c5e 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -76,6 +76,12 @@ public:
         return ELF::ELFOSABI_FREEBSD;
       case Triple::Linux:
         return ELF::ELFOSABI_LINUX;
+        // @LOCALMOD-BEGIN
+        // This shouldn't be needed anymore (sel_ldr doesn't check for it),
+        // but removing it may require some changes in binutils also.
+      case Triple::NaCl:
+        return ELF::ELFOSABI_NACL;
+        // @LOCALMOD-END
       default:
         return ELF::ELFOSABI_NONE;
     }
diff --git a/include/llvm/MC/MCNaCl.h b/include/llvm/MC/MCNaCl.h
new file mode 100644
index 0000000000..cf9b23ec1c
--- /dev/null
+++ b/include/llvm/MC/MCNaCl.h
@@ -0,0 +1,18 @@
+//===- MCNaCl.h - NaCl-specific code for MC  --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+class MCContext;
+class MCStreamer;
+class Triple;
+/// Initialize target-specific bundle alignment and emit target-specific NaCl
+/// ELF note sections.
+void initializeNaClMCStreamer(MCStreamer &Streamer, MCContext &Ctx,
+                              const Triple &TheTriple);
+}
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index c46dfebd37..1d7ea70018 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -326,6 +326,7 @@ enum {
   ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000
   ELFOSABI_C6000_LINUX = 65,  // Linux TMS320C6000
   ELFOSABI_ARM = 97,          // ARM
+  ELFOSABI_NACL = 123,        // Native Client // @LOCALMOD
   ELFOSABI_STANDALONE = 255   // Standalone (embedded) application
 };
 
@@ -1395,6 +1396,13 @@ enum {
   PF_MASKPROC = 0xf0000000 // Bits for processor-specific semantics.
 };
 
+// @LOCALMOD-BEGIN
+// Note segment descriptor types (for object files).
+enum {
+  NT_VERSION  = 1          // Note contains a version string.
+};
+// @LOCALMOD-END
+
 // Dynamic table entry for ELF32.
 struct Elf32_Dyn
 {
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index b06676d4d2..9658cded73 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -711,8 +711,14 @@ namespace llvm {
     /// @param Fn - A function to construct an MCInstPrinter for the target.
     static void RegisterMCInstPrinter(Target &T,
                                       Target::MCInstPrinterCtorTy Fn) {
+      // @LOCALMOD-BEGIN
+      // Prune out the .s printer for the sandboxed translator,
+      // by preventing an InstPrinter from being used at all.
+      #if !defined(__native_client__)
       if (!T.MCInstPrinterCtorFn)
         T.MCInstPrinterCtorFn = Fn;
+      #endif
+      // @LOCALMOD-END
     }
 
     /// RegisterMCCodeEmitter - Register a MCCodeEmitter implementation for the
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index b49341c3ff..00284ee5aa 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -105,6 +105,11 @@ protected:
   void setValPtrInt(unsigned K) { VP.setInt(K); }
   unsigned getValPtrInt() const { return VP.getInt(); }
 
+  // @LOCALMOD-BEGIN -- Hack for bug:
+  // http://code.google.com/p/nativeclient/issues/detail?id=2786
+  void setKind(HandleBaseKind K) { PrevPair.setInt(K); }
+  // @LOCALMOD-END
+
   static bool isValid(Value *V) {
     return V &&
            V != DenseMapInfo<Value *>::getEmptyKey() &&
@@ -231,6 +236,21 @@ public:
     return getValPtr();
   }
 
+  // @LOCALMOD-BEGIN -- Hack for bug:
+  // http://code.google.com/p/nativeclient/issues/detail?id=2786
+  // This allows us to weaken the Asserting Value Handle in LexicalScopes.h,
+  // for Debug info only. FIXME: check if this is fixed by some upstream
+  // changes, e.g., r174084.  Test by building the full ARM IRT w/ debug
+  // info, and dosbox with full debug info.
+#ifndef NDEBUG
+  // Only enable for !defined(NDEBUG), since this only inherits from
+  // ValueHandleBase when !defined(NDEBUG).
+  void make_weak() {
+    setKind(Weak);
+  }
+#endif
+  // @LOCALMOD-END
+
   ValueTy *operator->() const { return getValPtr(); }
   ValueTy &operator*() const { return *getValPtr(); }
 };
diff --git a/include/llvm/Support/support_macros.h b/include/llvm/Support/support_macros.h
new file mode 100644
index 0000000000..83d62c722c
--- /dev/null
+++ b/include/llvm/Support/support_macros.h
@@ -0,0 +1,25 @@
+// Define support macros for defining classes, etc.
+
+#ifndef LLVM_SUPPORT_SUPPORT_MACROS_H__
+#define LLVM_SUPPORT_SUPPORT_MACROS_H__
+
+// Define macro, to use within a class declaration,  to disallow constructor
+// copy. Defines copy constructor declaration under the assumption that it
+// is never defined.
+#define DISALLOW_CLASS_COPY(class_name) \
+  class_name(class_name& arg)  // Do not implement
+
+// Define macro, to use within a class declaration,  to disallow assignment.
+// Defines assignment operation declaration under the assumption that it
+// is never defined.
+#define DISALLOW_CLASS_ASSIGN(class_name) \
+  void operator=(class_name& arg)  // Do not implement
+
+// Define macro to add copy and assignment declarations to a class file,
+// for which no bodies will be defined, effectively disallowing these from
+// being defined in the class.
+#define DISALLOW_CLASS_COPY_AND_ASSIGN(class_name) \
+  DISALLOW_CLASS_COPY(class_name); \
+  DISALLOW_CLASS_ASSIGN(class_name)
+
+#endif  // LLVM_SUPPORT_SUPPORT_MACROS_H__
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index 43dace6ab8..325afb0428 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -597,7 +597,7 @@ enum _ {
 #else
   stream_timeout                      = ETIMEDOUT,
 #endif
-  text_file_busy                      = ETXTBSY,
+  text_file_busy                      = EINVAL, // @LOCALMOD
   timed_out                           = ETIMEDOUT,
   too_many_files_open_in_system       = ENFILE,
   too_many_files_open                 = EMFILE,
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index 7de8b384c3..2c5f895508 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -770,6 +770,40 @@ def LIFETIME_END : Instruction {
   let AsmString = "LIFETIME_END";
   let neverHasSideEffects = 1;
 }
+// @LOCALMOD-BEGIN
+def BUNDLE_ALIGN_START : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_ALIGN_END : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_LOCK : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+def BUNDLE_UNLOCK : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+  let isNotDuplicable = 1;
+}
+// @LOCALMOD-END
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index d5c9ebe0f2..7ce6584762 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -115,6 +115,18 @@ public:
                           // mask (ex: x86 blends).
   };
 
+  // @LOCALMOD-START
+  // This needs to be kept in sync with
+  // native_client/src/untrusted/nacl/pnaclintrin.h.
+  enum PnaclTargetArchitecture {
+    PnaclTargetArchitectureInvalid = 0,
+    PnaclTargetArchitectureX86_32,
+    PnaclTargetArchitectureX86_64,
+    PnaclTargetArchitectureARM_32,
+    PnaclTargetArchitectureARM_32_Thumb
+  };
+  // @LOCALMOD-END
+
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
     switch (Content) {
     case UndefinedBooleanContent:
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 516e0706b8..2c9459974a 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -91,7 +91,14 @@ namespace TargetOpcode {
 
     /// Lifetime markers.
     LIFETIME_START = 15,
-    LIFETIME_END = 16
+    LIFETIME_END = 16,
+
+    // @LOCALMOD-BEGIN
+    BUNDLE_ALIGN_START = 14,
+    BUNDLE_ALIGN_END = 15,
+    BUNDLE_LOCK = 16,
+    BUNDLE_UNLOCK = 17
+    // @LOCALMOD-END
   };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index c763a595dd..f70b575f39 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -30,6 +30,12 @@ namespace llvm {
     };
   }
 
+  // @LOCALMOD-BEGIN
+  /// TLSUseCall - This flag enables the use of a function call to get the
+  /// thread pointer for TLS accesses, instead of using inline code.
+  extern bool TLSUseCall;
+  // @LOCALMOD-END
+
   namespace FPOpFusion {
     enum FPOpFusionMode {
       Fast,     // Enable fusion of FP ops wherever it's profitable.
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
new file mode 100644
index 0000000000..43adb237dd
--- /dev/null
+++ b/include/llvm/Transforms/NaCl.h
@@ -0,0 +1,76 @@
+//===-- NaCl.h - NaCl Transformations ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_NACL_H
+#define LLVM_TRANSFORMS_NACL_H
+
+namespace llvm {
+
+class BasicBlockPass;
+class Function;
+class FunctionPass;
+class FunctionType;
+class Instruction;
+class ModulePass;
+class PassManager;
+class Use;
+class Value;
+
+ModulePass *createAddPNaClExternalDeclsPass();
+ModulePass *createCanonicalizeMemIntrinsicsPass();
+ModulePass *createExpandArithWithOverflowPass();
+ModulePass *createExpandByValPass();
+FunctionPass *createExpandConstantExprPass();
+ModulePass *createExpandCtorsPass();
+BasicBlockPass *createExpandGetElementPtrPass();
+ModulePass *createExpandSmallArgumentsPass();
+FunctionPass *createExpandStructRegsPass();
+ModulePass *createExpandTlsPass();
+ModulePass *createExpandTlsConstantExprPass();
+ModulePass *createExpandVarArgsPass();
+ModulePass *createFlattenGlobalsPass();
+ModulePass *createGlobalCleanupPass();
+BasicBlockPass *createPromoteI1OpsPass();
+FunctionPass *createPromoteIntegersPass();
+ModulePass *createReplacePtrsWithIntsPass();
+ModulePass *createResolveAliasesPass();
+FunctionPass *createResolvePNaClIntrinsicsPass();
+ModulePass *createRewriteLLVMIntrinsicsPass();
+ModulePass *createRewritePNaClLibraryCallsPass();
+ModulePass *createStripAttributesPass();
+ModulePass *createStripMetadataPass();
+FunctionPass *createInsertDivideCheckPass();
+
+void PNaClABISimplifyAddPreOptPasses(PassManager &PM);
+void PNaClABISimplifyAddPostOptPasses(PassManager &PM);
+
+Instruction *PhiSafeInsertPt(Use *U);
+void PhiSafeReplaceUses(Use *U, Value *NewVal);
+
+// Copy debug information from Original to NewInst, and return NewInst.
+Instruction *CopyDebug(Instruction *NewInst, Instruction *Original);
+
+template <class InstType>
+static void CopyLoadOrStoreAttrs(InstType *Dest, InstType *Src) {
+  Dest->setVolatile(Src->isVolatile());
+  Dest->setAlignment(Src->getAlignment());
+  Dest->setOrdering(Src->getOrdering());
+  Dest->setSynchScope(Src->getSynchScope());
+}
+
+// In order to change a function's type, the function must be
+// recreated.  RecreateFunction() recreates Func with type NewType.
+// It copies or moves across everything except the argument values,
+// which the caller must update because the argument types might be
+// different.
+Function *RecreateFunction(Function *Func, FunctionType *NewType);
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index e833aaa6d6..dc23dec382 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -346,7 +346,7 @@ extern char &InstructionSimplifierID;
 // "block_weights" metadata.
 FunctionPass *createLowerExpectIntrinsicPass();
 
-
+FunctionPass *createNaClCcRewritePass(const TargetLowering *TLI = 0);
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Wrap/BCHeaderField.h b/include/llvm/Wrap/BCHeaderField.h
new file mode 100644
index 0000000000..40a3714c9f
--- /dev/null
+++ b/include/llvm/Wrap/BCHeaderField.h
@@ -0,0 +1,106 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#ifndef LLVM_WRAP_BCHEADERFIELD_H
+#define LLVM_WRAP_BCHEADERFIELD_H
+#include <limits>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known Tag IDs.
+// Contains a pointer to the data but does not own the data, so it can be
+// copied with the trivial copy constructor/assignment operator.
+
+// The serialized format has 2 fixed subfields (ID and length) and the
+// variable-length data subfield
+class BCHeaderField {
+ public:
+  typedef enum {
+    kInvalid = 0,
+    kBitcodeHash = 1,
+    kAndroidCompilerVersion = 0x4001,
+    kAndroidOptimizationLevel = 0x4002
+  } Tag;
+  typedef uint16_t FixedSubfield;
+
+  BCHeaderField(Tag ID, size_t len, uint8_t* data) :
+      ID_(ID), len_(len), data_(data) {}
+  size_t GetTotalSize() {
+    // Round up to 4 byte alignment
+    return (kTagLenSize + len_ + 3) & ~3;
+  }
+
+  bool Write(uint8_t* buf, size_t buf_len) {
+    size_t fields_len = kTagLenSize + len_;
+    size_t pad_len = (4 - (fields_len & 3)) & 3;
+    // Ensure buffer is large enough and that length can be represented
+    // in 16 bits
+    if (buf_len < fields_len + pad_len ||
+        len_ > std::numeric_limits<FixedSubfield>::max()) return false;
+
+    WriteFixedSubfield(static_cast<FixedSubfield>(ID_), buf);
+    WriteFixedSubfield(static_cast<FixedSubfield>(len_),
+                       buf + sizeof(FixedSubfield));
+    memcpy(buf + kTagLenSize, data_, len_);
+    // Pad out to 4 byte alignment
+    if (pad_len) {
+      memset(buf + fields_len, 0, pad_len);
+    }
+    return true;
+  }
+
+  bool Read(const uint8_t* buf, size_t buf_len) {
+    if (buf_len < kTagLenSize) return false;
+    FixedSubfield field;
+    ReadFixedSubfield(&field, buf);
+    ID_ = static_cast<Tag>(field);
+    ReadFixedSubfield(&field, buf + sizeof(FixedSubfield));
+    len_ = static_cast<size_t>(field);
+    if (buf_len < kTagLenSize + len_) return false;
+    memcpy(data_, buf + kTagLenSize, len_);
+    return true;
+  }
+
+  void Print() {
+    fprintf(stderr, "Field ID: %d, data length %d, total length %d\n",
+            ID_, static_cast<int>(len_), static_cast<int>(GetTotalSize()));
+    fprintf(stderr, "Data: ");
+    for (size_t i = 0; i < len_; i++) fprintf(stderr, "%02x", data_[i]);
+    fprintf(stderr, "\n");
+  }
+
+  // Get the data size from a serialized field to allow allocation
+  static size_t GetDataSizeFromSerialized(const uint8_t* buf) {
+    FixedSubfield len;
+    ReadFixedSubfield(&len, buf + sizeof(FixedSubfield));
+    return len;
+  }
+
+  Tag getID() const {
+    return ID_;
+  }
+
+  size_t getLen() const {
+    return len_;
+  }
+
+ private:
+ // Combined size of the fixed subfields
+ const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+  static void WriteFixedSubfield(FixedSubfield value, uint8_t* buf) {
+    buf[0] = value & 0xFF;
+    buf[1] = (value >> 8) & 0xFF;
+  }
+  static void ReadFixedSubfield(FixedSubfield* value, const uint8_t* buf) {
+    *value = buf[0] | buf[1] << 8;
+  }
+  Tag ID_;
+  size_t len_;
+  uint8_t *data_;
+};
+
+#endif
diff --git a/include/llvm/Wrap/bitcode_wrapperer.h b/include/llvm/Wrap/bitcode_wrapperer.h
new file mode 100644
index 0000000000..89f2a4cbcc
--- /dev/null
+++ b/include/llvm/Wrap/bitcode_wrapperer.h
@@ -0,0 +1,192 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Define utility class to wrap/unwrap bitcode files. Does wrapping/unwrapping
+// in such a way that the wrappered bitcode file is still a bitcode file.
+
+#ifndef LLVM_WRAP_BITCODE_WRAPPERER_H__
+#define LLVM_WRAP_BITCODE_WRAPPERER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <vector>
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/BCHeaderField.h"
+#include "llvm/Wrap/wrapper_input.h"
+#include "llvm/Wrap/wrapper_output.h"
+
+// The bitcode wrapper header is the following 7 fixed 4-byte fields:
+//      1) 0B17C0DE - The magic number expected by llvm for wrapped bitcodes
+//      2) Version # 0 - The current version of wrapped bitcode files
+//      3) (raw) bitcode offset
+//      4) (raw) bitcode size
+//      5) Android header version
+//      6) Android target API
+//      7) PNaCl Bitcode version
+//      plus 0 or more variable-length fields (consisting of ID, length, data)
+
+// Initial buffer size. It is expanded if needed to hold large variable-size
+// fields.
+static const size_t kBitcodeWrappererBufferSize = 1024;
+
+// Support class for outputting a wrapped bitcode file from a raw bitcode
+// file (and optionally additional header fields), or for outputting a raw
+// bitcode file from a wrapped one.
+class BitcodeWrapperer {
+ public:
+  // Create a bitcode wrapperer using the following
+  // input and output files.
+  BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile);
+
+  // Returns true if the input file begins with a bitcode
+  // wrapper magic number. As a side effect, _wrapper_ fields are set.
+  bool IsInputBitcodeWrapper();
+
+  // Returns true if the input file begins with a bitcode
+  // file magic number.
+  bool IsInputBitcodeFile();
+
+  // Add a variable-length field to the header. The caller is responsible
+  // for freeing the data pointed to by the BCHeaderField.
+  void AddHeaderField(BCHeaderField* field);
+
+  // Generate a wrapped bitcode file from the input bitcode file
+  // and the current header data. Return true on success.
+  bool GenerateWrappedBitcodeFile();
+
+  // Unwrap the wrapped bitcode file, to the corresponding
+  // outfile. Return true on success.
+  bool GenerateRawBitcodeFile();
+
+  // Print current wrapper header fields to stderr for debugging.
+  void PrintWrapperHeader();
+
+  ~BitcodeWrapperer();
+
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(BitcodeWrapperer);
+
+  // Refills the buffer with more bytes. Does this in a way
+  // such that it is maximally filled.
+  void FillBuffer();
+
+  // Returns the number of bytes in infile.
+  off_t GetInFileSize() {
+    if (infile_ != NULL) {
+      return infile_->Size();
+    } else {
+      return 0;
+    }
+  }
+
+  // Returns the offset of bitcode (i.e. the size of the wrapper header)
+  // if the output file were to be written now.
+  size_t BitcodeOffset();
+
+  // Returns true if we can read a word. If necessary, fills the buffer
+  // with enough characters so that there are at least a 32-bit value
+  // in the buffer. Returns false if there isn't a 32-bit value
+  // to read from the input file.
+  bool CanReadWord();
+
+  // Read a (32-bit) word from the input. Return true
+  // if able to read the word.
+  bool ReadWord(uint32_t& word);
+
+  // Write a (32-bit) word to the output. Return true if successful
+  bool WriteWord(uint32_t word);
+
+  // Write all variable-sized header fields to the output. Return true
+  // if successful.
+  bool WriteVariableFields();
+
+  // Parse the bitcode wrapper header in the infile, if any. Return true
+  // if successful.
+  bool ParseWrapperHeader();
+
+  // Returns the i-th character in front of the cursor in the buffer.
+  uint8_t BufferLookahead(int i) { return buffer_[cursor_ + i]; }
+
+  // Returns how many unread bytes are in the buffer.
+  size_t GetBufferUnreadBytes() { return buffer_size_ - cursor_; }
+
+
+  // Backs up the read cursor to the beginning of the input buffer.
+  void ResetCursor() {
+    cursor_ = 0;
+  }
+
+  // Generates the header sequence for the wrapped bitcode being
+  // generated.
+  bool WriteBitcodeWrapperHeader();
+
+  // Copies size bytes of infile to outfile, using the buffer.
+  bool BufferCopyInToOut(uint32_t size);
+
+  // Discards the old infile and replaces it with the given file.
+  void ReplaceInFile(WrapperInput* new_infile);
+
+  // Discards the old outfile and replaces it with the given file.
+  void ReplaceOutFile(WrapperOutput* new_outfile);
+
+  // Moves to the given position in the input file. Returns false
+  // if unsuccessful.
+  bool Seek(uint32_t pos);
+
+  // Clear the buffer of all contents.
+  void ClearBuffer();
+
+  // The input file being processed. Can be either
+  // a bitcode file, a wrappered bitcode file, or a secondary
+  // file to be wrapped.
+  WrapperInput* infile_;
+
+  // The output file being generated. Can be either
+  // a bitcode file, a wrappered bitcode file, or a secondary
+  // unwrapped file.
+  WrapperOutput* outfile_;
+
+  // A buffer of bytes read from the input file.
+  std::vector<uint8_t> buffer_;
+
+  // The number of bytes that were read from the input file
+  // into the buffer.
+  size_t buffer_size_;
+
+  // The index to the current read point within the buffer.
+  size_t cursor_;
+
+  // True when eof of input is reached.
+  bool infile_at_eof_;
+
+  // The 32-bit value defining the offset of the raw bitcode in the input file.
+  uint32_t infile_bc_offset_;
+
+  // The 32-bit value defining the generated offset of the wrapped bitcode.
+  // This value changes as new fields are added with AddHeaderField
+  uint32_t wrapper_bc_offset_;
+
+  // The 32-bit value defining the size of the raw wrapped bitcode.
+  uint32_t wrapper_bc_size_;
+
+  // Android header version and target API
+  uint32_t android_header_version_;
+  uint32_t android_target_api_;
+
+  // PNaCl bitcode version
+  uint32_t pnacl_bc_version_;
+
+  // Vector of variable header fields
+  std::vector<BCHeaderField> header_fields_;
+  // If any bufferdata from header fields is owned, it is stored here and
+  // freed on destruction.
+  std::vector<uint8_t*> variable_field_data_;
+
+  // True if there was an error condition (e.g. the file is not bitcode)
+  bool error_;
+};
+
+#endif  // LLVM_WRAP_BITCODE_WRAPPERER_H__
diff --git a/include/llvm/Wrap/file_wrapper_input.h b/include/llvm/Wrap/file_wrapper_input.h
new file mode 100644
index 0000000000..9f3de004c4
--- /dev/null
+++ b/include/llvm/Wrap/file_wrapper_input.h
@@ -0,0 +1,48 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines utility allowing files for bitcode input wrapping.
+
+#ifndef FILE_WRAPPER_INPUT_H__
+#define FILE_WRAPPER_INPUT_H__
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/wrapper_input.h"
+
+#include <stdio.h>
+#include <string>
+
+// Define a class to wrap named files.
+class FileWrapperInput : public WrapperInput {
+ public:
+  FileWrapperInput(const std::string& name);
+  ~FileWrapperInput();
+  // Tries to read the requested number of bytes into the buffer. Returns the
+  // actual number of bytes read.
+  virtual size_t Read(uint8_t* buffer, size_t wanted);
+  // Returns true if at end of file. Note: May return false
+  // until Read is called, and returns 0.
+  virtual bool AtEof();
+  // Returns the size of the file (in bytes).
+  virtual off_t Size();
+  // Moves to the given offset within the file. Returns
+  // false if unable to move to that position.
+  virtual bool Seek(uint32_t pos);
+ private:
+  // The name of the file.
+  std::string _name;
+  // True once eof has been encountered.
+  bool _at_eof;
+  // True if size has been computed.
+  bool _size_found;
+  // The size of the file.
+  off_t _size;
+  // The corresponding (opened) file.
+  FILE* _file;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperInput);
+};
+
+#endif // FILE_WRAPPER_INPUT_H__
diff --git a/include/llvm/Wrap/file_wrapper_output.h b/include/llvm/Wrap/file_wrapper_output.h
new file mode 100644
index 0000000000..714bd36a75
--- /dev/null
+++ b/include/llvm/Wrap/file_wrapper_output.h
@@ -0,0 +1,34 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines utility allowing files for bitcode output wrapping.
+
+#ifndef FILE_WRAPPER_OUTPUT_H__
+#define FILE_WRAPPER_OUTPUT_H__
+
+#include "llvm/Support/support_macros.h"
+#include "llvm/Wrap/wrapper_output.h"
+#include <stdio.h>
+#include <string>
+
+// Define a class to wrap named files. */
+class FileWrapperOutput : public WrapperOutput {
+ public:
+  FileWrapperOutput(const std::string& name);
+  ~FileWrapperOutput();
+  // Writes a single byte, returning false if unable to write.
+  virtual bool Write(uint8_t byte);
+  // Writes the specified number of bytes in the buffer to
+  // output. Returns false if unable to write.
+  virtual bool Write(const uint8_t* buffer, size_t buffer_size);
+ private:
+  // The name of the file
+  std::string _name;
+  // The corresponding (opened) file.
+  FILE* _file;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(FileWrapperOutput);
+};
+#endif  // FILE_WRAPPER_OUTPUT_H__
diff --git a/include/llvm/Wrap/wrapper_input.h b/include/llvm/Wrap/wrapper_input.h
new file mode 100644
index 0000000000..cde918083a
--- /dev/null
+++ b/include/llvm/Wrap/wrapper_input.h
@@ -0,0 +1,38 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Define a generic interface to a file/memory region that contains
+// a bitcode file, a wrapped bitcode file, or a data file to wrap.
+
+#ifndef LLVM_WRAP_WRAPPER_INPUT_H__
+#define LLVM_WRAP_WRAPPER_INPUT_H__
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "llvm/Support/support_macros.h"
+
+// The following is a generic interface to a file/memory region that contains
+// a bitcode file, a wrapped bitcode file, or data file to wrap.
+class WrapperInput {
+ public:
+  WrapperInput() {}
+  virtual ~WrapperInput() {}
+  // Tries to read the requested number of bytes into the buffer. Returns the
+  // actual number of bytes read.
+  virtual size_t Read(uint8_t* buffer, size_t wanted) = 0;
+  // Returns true if at end of input. Note: May return false until
+  // Read is called, and returns 0.
+  virtual bool AtEof() = 0;
+  // Returns the size of the input (in bytes).
+  virtual off_t Size() = 0;
+  // Moves to the given offset within the input region. Returns false
+  // if unable to move to that position.
+  virtual bool Seek(uint32_t pos) = 0;
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperInput);
+};
+
+#endif  // LLVM_WRAP_WRAPPER_INPUT_H__
diff --git a/include/llvm/Wrap/wrapper_output.h b/include/llvm/Wrap/wrapper_output.h
new file mode 100644
index 0000000000..7045705991
--- /dev/null
+++ b/include/llvm/Wrap/wrapper_output.h
@@ -0,0 +1,34 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+// Defines a generic interface to a file/memory region that
+// contains a generated wrapped bitcode file, bitcode file,
+// or data file.
+
+#ifndef LLVM_WRAP_WRAPPER_OUTPUT_H__
+#define LLVM_WRAP_WRAPPER_OUTPUT_H__
+
+#include <stdint.h>
+#include <stddef.h>
+
+#include "llvm/Support/support_macros.h"
+
+// The following is a generic interface to a file/memory region
+// that contains a generated bitcode file, wrapped bitcode file,
+// or a data file.
+class WrapperOutput {
+ public:
+  WrapperOutput() {}
+  virtual ~WrapperOutput() {}
+  // Writes a single byte, returning false if unable to write.
+  virtual bool Write(uint8_t byte) = 0;
+  // Writes the specified number of bytes in the buffer to
+  // output. Returns false if unable to write.
+  virtual bool Write(const uint8_t* buffer, size_t buffer_size);
+ private:
+  DISALLOW_CLASS_COPY_AND_ASSIGN(WrapperOutput);
+};
+
+#endif  // LLVM_WRAP_WRAPPER_OUTPUT_H__
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 597c767a8e..b5207670c3 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -63,3 +63,4 @@ add_llvm_library(LLVMAnalysis
 add_dependencies(LLVMAnalysis intrinsics_gen)
 
 add_subdirectory(IPA)
+add_subdirectory(NaCl) # LOCALMOD
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index a8a8079d1e..de734ec3f7 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = IPA
+subdirectories = IPA NaCl
 
 [component_0]
 type = Library
diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile
index 4af6d350a6..426ed1699d 100644
--- a/lib/Analysis/Makefile
+++ b/lib/Analysis/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 LIBRARYNAME = LLVMAnalysis
-DIRS = IPA
+DIRS = IPA NaCl
 BUILD_ARCHIVE = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Analysis/NaCl/CMakeLists.txt b/lib/Analysis/NaCl/CMakeLists.txt
new file mode 100644
index 0000000000..f62a4e5c02
--- /dev/null
+++ b/lib/Analysis/NaCl/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMNaClAnalysis
+  PNaClABITypeChecker.cpp
+  PNaClABIVerifyFunctions.cpp
+  PNaClABIVerifyModule.cpp
+  )
+
+add_dependencies(LLVMNaClAnalysis intrinsics_gen)
diff --git a/lib/Analysis/NaCl/LLVMBuild.txt b/lib/Analysis/NaCl/LLVMBuild.txt
new file mode 100644
index 0000000000..b5e7c8a5ea
--- /dev/null
+++ b/lib/Analysis/NaCl/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Analysis/NaCl/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClAnalysis
+parent = Analysis
+library_name = NaClAnalysis
+required_libraries = Analysis Core Support
diff --git a/lib/Analysis/NaCl/Makefile b/lib/Analysis/NaCl/Makefile
new file mode 100644
index 0000000000..7d03b1e92e
--- /dev/null
+++ b/lib/Analysis/NaCl/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Analysis/NaCl/Makefile-------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMNaClAnalysis
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+\ No newline at end of file
diff --git a/lib/Analysis/NaCl/PNaClABITypeChecker.cpp b/lib/Analysis/NaCl/PNaClABITypeChecker.cpp
new file mode 100644
index 0000000000..8749abcaa6
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABITypeChecker.cpp
@@ -0,0 +1,64 @@
+//===- PNaClABITypeChecker.cpp - Verify PNaCl ABI rules -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common type-checking code for module and function-level passes
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "PNaClABITypeChecker.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Metadata.h"
+
+using namespace llvm;
+
+bool PNaClABITypeChecker::isValidParamType(const Type *Ty) {
+  if (!isValidScalarType(Ty))
+    return false;
+  if (const IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
+    // PNaCl requires function arguments and return values to be 32
+    // bits or larger.  This avoids exposing architecture
+    // ABI-dependent differences about whether arguments or return
+    // values are zero-extended when calling a function with the wrong
+    // prototype.
+    if (IntTy->getBitWidth() < 32)
+      return false;
+  }
+  return true;
+}
+
+bool PNaClABITypeChecker::isValidFunctionType(const FunctionType *FTy) {
+  if (FTy->isVarArg())
+    return false;
+  if (!isValidParamType(FTy->getReturnType()))
+    return false;
+  for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I) {
+    if (!isValidParamType(FTy->getParamType(I)))
+      return false;
+  }
+  return true;
+}
+
+bool PNaClABITypeChecker::isValidScalarType(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID: {
+      unsigned Width = cast<const IntegerType>(Ty)->getBitWidth();
+      return Width == 1 || Width == 8 || Width == 16 ||
+             Width == 32 || Width == 64;
+    }
+    case Type::VoidTyID:
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      return false;
+  }
+}
diff --git a/lib/Analysis/NaCl/PNaClABITypeChecker.h b/lib/Analysis/NaCl/PNaClABITypeChecker.h
new file mode 100644
index 0000000000..ac3cf850e5
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABITypeChecker.h
@@ -0,0 +1,47 @@
+//===- PNaClABITypeChecker.h - Verify PNaCl ABI rules ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common type-checking code for module and function-level passes
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ANALYSIS_NACL_CHECKTYPES_H
+#define LIB_ANALYSIS_NACL_CHECKTYPES_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+class FunctionType;
+
+class PNaClABITypeChecker {
+  // Returns true if Ty is a valid argument or return value type for PNaCl.
+  static bool isValidParamType(const Type *Ty);
+
+ public:
+  // Returns true if Ty is a valid function type for PNaCl.
+  static bool isValidFunctionType(const FunctionType *FTy);
+
+  // Returns true if Ty is a valid non-derived type for PNaCl.
+  static bool isValidScalarType(const Type *Ty);
+
+  // There's no built-in way to get the name of a type, so use a
+  // string ostream to print it.
+  static std::string getTypeName(const Type *T) {
+    std::string TypeName;
+    raw_string_ostream N(TypeName);
+    T->print(N);
+    return N.str();
+  }
+};
+} // namespace llvm
+
+#endif // LIB_ANALYSIS_NACL_CHECKTYPES_H
diff --git a/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp b/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp
new file mode 100644
index 0000000000..80d7da3f19
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp
@@ -0,0 +1,475 @@
+//===- PNaClABIVerifyFunctions.cpp - Verify PNaCl ABI rules ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Verify function-level PNaCl ABI requirements.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "PNaClABITypeChecker.h"
+using namespace llvm;
+
+namespace {
+
+// Checks that examine anything in the function body should be in
+// FunctionPasses to make them streaming-friendly
+class PNaClABIVerifyFunctions : public FunctionPass {
+ public:
+  static char ID;
+  PNaClABIVerifyFunctions() :
+      FunctionPass(ID),
+      Reporter(new PNaClABIErrorReporter),
+      ReporterIsOwned(true) {
+    initializePNaClABIVerifyFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+  explicit PNaClABIVerifyFunctions(PNaClABIErrorReporter *Reporter_) :
+      FunctionPass(ID),
+      Reporter(Reporter_),
+      ReporterIsOwned(false) {
+    initializePNaClABIVerifyFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+  ~PNaClABIVerifyFunctions() {
+    if (ReporterIsOwned)
+      delete Reporter;
+  }
+  bool runOnFunction(Function &F);
+  virtual void print(raw_ostream &O, const Module *M) const;
+ private:
+  bool IsWhitelistedMetadata(unsigned MDKind);
+  const char *checkInstruction(const Instruction *Inst);
+  PNaClABIErrorReporter *Reporter;
+  bool ReporterIsOwned;
+};
+
+} // and anonymous namespace
+
+// There's no built-in way to get the name of an MDNode, so use a
+// string ostream to print it.
+static std::string getMDNodeString(unsigned Kind,
+                                   const SmallVectorImpl<StringRef> &MDNames) {
+  std::string MDName;
+  raw_string_ostream N(MDName);
+  if (Kind < MDNames.size()) {
+    N << "!" << MDNames[Kind];
+  } else {
+    N << "!<unknown kind #" << Kind << ">";
+  }
+  return N.str();
+}
+
+bool PNaClABIVerifyFunctions::IsWhitelistedMetadata(unsigned MDKind) {
+  return MDKind == LLVMContext::MD_dbg && PNaClABIAllowDebugMetadata;
+}
+
+// A valid pointer type is either:
+//  * a pointer to a valid PNaCl scalar type (except i1), or
+//  * a function pointer (with valid argument and return types).
+//
+// i1 is disallowed so that all loads and stores are a whole number of
+// bytes, and so that we do not need to define whether a store of i1
+// zero-extends.
+static bool isValidPointerType(Type *Ty) {
+  if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
+    if (PtrTy->getAddressSpace() != 0)
+      return false;
+    Type *EltTy = PtrTy->getElementType();
+    if (PNaClABITypeChecker::isValidScalarType(EltTy) &&
+        !EltTy->isIntegerTy(1))
+      return true;
+    if (FunctionType *FTy = dyn_cast<FunctionType>(EltTy))
+      return PNaClABITypeChecker::isValidFunctionType(FTy);
+  }
+  return false;
+}
+
+static bool isIntrinsicFunc(const Value *Val) {
+  if (const Function *F = dyn_cast<Function>(Val))
+    return F->isIntrinsic();
+  return false;
+}
+
+// InherentPtrs may be referenced by casts -- PtrToIntInst and
+// BitCastInst -- that produce NormalizedPtrs.
+//
+// InherentPtrs exclude intrinsic functions in order to prevent taking
+// the address of an intrinsic function.  InherentPtrs include
+// intrinsic calls because some intrinsics return pointer types
+// (e.g. nacl.read.tp returns i8*).
+static bool isInherentPtr(const Value *Val) {
+  return isa<AllocaInst>(Val) ||
+         (isa<GlobalValue>(Val) && !isIntrinsicFunc(Val)) ||
+         isa<IntrinsicInst>(Val);
+}
+
+// NormalizedPtrs may be used where pointer types are required -- for
+// loads, stores, etc.  Note that this excludes ConstantExprs,
+// ConstantPointerNull and UndefValue.
+static bool isNormalizedPtr(const Value *Val) {
+  if (!isValidPointerType(Val->getType()))
+    return false;
+  // The bitcast must also be a bitcast of an InherentPtr, but we
+  // check that when visiting the bitcast instruction.
+  return isa<IntToPtrInst>(Val) || isa<BitCastInst>(Val) || isInherentPtr(Val);
+}
+
+static bool isValidScalarOperand(const Value *Val) {
+  // The types of Instructions and Arguments are checked elsewhere
+  // (when visiting the Instruction or the Function).  BasicBlocks are
+  // included here because branch instructions have BasicBlock
+  // operands.
+  if (isa<Instruction>(Val) || isa<Argument>(Val) || isa<BasicBlock>(Val))
+    return true;
+
+  // Allow some Constants.  Note that this excludes ConstantExprs.
+  return PNaClABITypeChecker::isValidScalarType(Val->getType()) &&
+         (isa<ConstantInt>(Val) ||
+          isa<ConstantFP>(Val) ||
+          isa<UndefValue>(Val));
+}
+
+static bool isAllowedAlignment(unsigned Alignment, Type *Ty, bool IsAtomic) {
+  if (IsAtomic) {
+    // For atomic operations, the alignment must match the size of the type.
+    if (Ty->isIntegerTy()) {
+      unsigned Bits = Ty->getIntegerBitWidth();
+      return Bits % 8 == 0 && Alignment == Bits / 8;
+    }
+    return (Ty->isDoubleTy() && Alignment == 8) ||
+           (Ty->isFloatTy() && Alignment == 4);
+  }
+  // Non-atomic integer operations must always use "align 1", since we
+  // do not want the backend to generate code with non-portable
+  // undefined behaviour (such as misaligned access faults) if user
+  // code specifies "align 4" but uses a misaligned pointer.  As a
+  // concession to performance, we allow larger alignment values for
+  // floating point types.
+  //
+  // To reduce the set of alignment values that need to be encoded in
+  // pexes, we disallow other alignment values.  We require alignments
+  // to be explicit by disallowing Alignment == 0.
+  return Alignment == 1 ||
+         (Ty->isDoubleTy() && Alignment == 8) ||
+         (Ty->isFloatTy() && Alignment == 4);
+}
+
+// Check the instruction's opcode and its operands.  The operands may
+// require opcode-specific checking.
+//
+// This returns an error string if the instruction is rejected, or
+// NULL if the instruction is allowed.
+const char *PNaClABIVerifyFunctions::checkInstruction(const Instruction *Inst) {
+  // If the instruction has a single pointer operand, PtrOperandIndex is
+  // set to its operand index.
+  unsigned PtrOperandIndex = -1;
+
+  switch (Inst->getOpcode()) {
+    // Disallowed instructions. Default is to disallow.
+    // We expand GetElementPtr out into arithmetic.
+    case Instruction::GetElementPtr:
+    // VAArg is expanded out by ExpandVarArgs.
+    case Instruction::VAArg:
+    // Zero-cost C++ exception handling is not supported yet.
+    case Instruction::Invoke:
+    case Instruction::LandingPad:
+    case Instruction::Resume:
+    // indirectbr may interfere with streaming
+    case Instruction::IndirectBr:
+    // No vector instructions yet
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    // ExtractValue and InsertValue operate on struct values.
+    case Instruction::ExtractValue:
+    case Instruction::InsertValue:
+      return "bad instruction opcode";
+    default:
+      return "unknown instruction opcode";
+
+    // Terminator instructions
+    case Instruction::Ret:
+    case Instruction::Br:
+    case Instruction::Unreachable:
+    // Binary operations
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+    // Bitwise binary operations
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    // Memory instructions
+    case Instruction::Fence:
+    // Conversion operations
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    // Other operations
+    case Instruction::FCmp:
+    case Instruction::PHI:
+    case Instruction::Select:
+      break;
+
+    // The following operations are of dubious usefulness on 1-bit
+    // values.  Use of the i1 type is disallowed here so that code
+    // generators do not need to support these corner cases.
+    case Instruction::ICmp:
+    // Binary operations
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      if (Inst->getOperand(0)->getType()->isIntegerTy(1))
+        return "arithmetic on i1";
+      break;
+
+    // Memory accesses.
+    case Instruction::Load: {
+      const LoadInst *Load = cast<LoadInst>(Inst);
+      if (!isAllowedAlignment(Load->getAlignment(),
+                              Load->getType(),
+                              Load->isAtomic()))
+        return "bad alignment";
+      PtrOperandIndex = 0;
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad pointer";
+      break;
+    }
+    case Instruction::Store: {
+      const StoreInst *Store = cast<StoreInst>(Inst);
+      if (!isAllowedAlignment(Store->getAlignment(),
+                              Store->getValueOperand()->getType(),
+                              Store->isAtomic()))
+        return "bad alignment";
+      PtrOperandIndex = 1;
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad pointer";
+      break;
+    }
+    case Instruction::AtomicCmpXchg:
+    case Instruction::AtomicRMW:
+      PtrOperandIndex = 0;
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad pointer";
+      break;
+
+    // Casts.
+    case Instruction::BitCast:
+      if (Inst->getType()->isPointerTy()) {
+        PtrOperandIndex = 0;
+        if (!isInherentPtr(Inst->getOperand(PtrOperandIndex)))
+          return "operand not InherentPtr";
+      }
+      break;
+    case Instruction::IntToPtr:
+      if (!cast<IntToPtrInst>(Inst)->getSrcTy()->isIntegerTy(32))
+        return "non-i32 inttoptr";
+      break;
+    case Instruction::PtrToInt:
+      PtrOperandIndex = 0;
+      if (!isInherentPtr(Inst->getOperand(PtrOperandIndex)))
+        return "operand not InherentPtr";
+      if (!Inst->getType()->isIntegerTy(32))
+        return "non-i32 ptrtoint";
+      break;
+
+    case Instruction::Alloca: {
+      const AllocaInst *Alloca = cast<AllocaInst>(Inst);
+      if (!Alloca->getAllocatedType()->isIntegerTy(8))
+        return "non-i8 alloca";
+      if (!Alloca->getArraySize()->getType()->isIntegerTy(32))
+        return "alloca array size is not i32";
+      break;
+    }
+
+    case Instruction::Call: {
+      const CallInst *Call = cast<CallInst>(Inst);
+      if (Call->isInlineAsm())
+        return "inline assembly";
+      if (!Call->getAttributes().isEmpty())
+        return "bad call attributes";
+      if (Call->getCallingConv() != CallingConv::C)
+        return "bad calling convention";
+
+      // Intrinsic calls can have multiple pointer arguments and
+      // metadata arguments, so handle them specially.
+      if (const IntrinsicInst *Call = dyn_cast<IntrinsicInst>(Inst)) {
+        for (unsigned ArgNum = 0, E = Call->getNumArgOperands();
+             ArgNum < E; ++ArgNum) {
+          const Value *Arg = Call->getArgOperand(ArgNum);
+          if (!(isValidScalarOperand(Arg) ||
+                isNormalizedPtr(Arg) ||
+                isa<MDNode>(Arg)))
+            return "bad intrinsic operand";
+        }
+        // Disallow alignments other than 1 on memcpy() etc., for the
+        // same reason that we disallow them on integer loads and
+        // stores.
+        if (const MemIntrinsic *MemOp = dyn_cast<MemIntrinsic>(Call)) {
+          // Avoid the getAlignment() method here because it aborts if
+          // the alignment argument is not a Constant.
+          Value *AlignArg = MemOp->getArgOperand(3);
+          if (!isa<ConstantInt>(AlignArg) ||
+              cast<ConstantInt>(AlignArg)->getZExtValue() != 1) {
+            return "bad alignment";
+          }
+        }
+        // Allow the instruction and skip the later checks.
+        return NULL;
+      }
+
+      // The callee is the last operand.
+      PtrOperandIndex = Inst->getNumOperands() - 1;
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad function callee operand";
+      break;
+    }
+
+    case Instruction::Switch: {
+      // SwitchInst represents switch cases using array and vector
+      // constants, which we normally reject, so we must check
+      // SwitchInst specially here.
+      const SwitchInst *Switch = cast<SwitchInst>(Inst);
+      if (!isValidScalarOperand(Switch->getCondition()))
+        return "bad switch condition";
+      if (Switch->getCondition()->getType()->isIntegerTy(1))
+        return "switch on i1";
+
+      // SwitchInst requires the cases to be ConstantInts, but it
+      // doesn't require their types to be the same as the condition
+      // value, so check all the cases too.
+      for (SwitchInst::ConstCaseIt Case = Switch->case_begin(),
+             E = Switch->case_end(); Case != E; ++Case) {
+        IntegersSubset CaseRanges = Case.getCaseValueEx();
+        for (unsigned I = 0, E = CaseRanges.getNumItems(); I < E ; ++I) {
+          if (!isValidScalarOperand(
+                  CaseRanges.getItem(I).getLow().toConstantInt()) ||
+              !isValidScalarOperand(
+                  CaseRanges.getItem(I).getHigh().toConstantInt())) {
+            return "bad switch case";
+          }
+        }
+      }
+
+      // Allow the instruction and skip the later checks.
+      return NULL;
+    }
+  }
+
+  // Check the instruction's operands.  We have already checked any
+  // pointer operands.  Any remaining operands must be scalars.
+  for (unsigned OpNum = 0, E = Inst->getNumOperands(); OpNum < E; ++OpNum) {
+    if (OpNum != PtrOperandIndex &&
+        !isValidScalarOperand(Inst->getOperand(OpNum)))
+      return "bad operand";
+  }
+
+  // Check arithmetic attributes.
+  if (const OverflowingBinaryOperator *Op =
+          dyn_cast<OverflowingBinaryOperator>(Inst)) {
+    if (Op->hasNoUnsignedWrap())
+      return "has \"nuw\" attribute";
+    if (Op->hasNoSignedWrap())
+      return "has \"nsw\" attribute";
+  }
+  if (const PossiblyExactOperator *Op =
+          dyn_cast<PossiblyExactOperator>(Inst)) {
+    if (Op->isExact())
+      return "has \"exact\" attribute";
+  }
+
+  // Allow the instruction.
+  return NULL;
+}
+
+bool PNaClABIVerifyFunctions::runOnFunction(Function &F) {
+  SmallVector<StringRef, 8> MDNames;
+  F.getContext().getMDKindNames(MDNames);
+
+  for (Function::const_iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI) {
+    for (BasicBlock::const_iterator BBI = FI->begin(), BBE = FI->end();
+             BBI != BBE; ++BBI) {
+      const Instruction *Inst = BBI;
+      // Check the instruction opcode first.  This simplifies testing,
+      // because some instruction opcodes must be rejected out of hand
+      // (regardless of the instruction's result type) and the tests
+      // check the reason for rejection.
+      const char *Error = checkInstruction(BBI);
+      // Check the instruction's result type.
+      if (!Error && !(PNaClABITypeChecker::isValidScalarType(Inst->getType()) ||
+                      isNormalizedPtr(Inst) ||
+                      isa<AllocaInst>(Inst))) {
+        Error = "bad result type";
+      }
+      if (Error) {
+        Reporter->addError() << "Function " << F.getName() <<
+          " disallowed: " << Error << ": " << *BBI << "\n";
+      }
+
+      // Check instruction attachment metadata.
+      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+      BBI->getAllMetadata(MDForInst);
+
+      for (unsigned i = 0, e = MDForInst.size(); i != e; i++) {
+        if (!IsWhitelistedMetadata(MDForInst[i].first)) {
+          Reporter->addError()
+              << "Function " << F.getName()
+              << " has disallowed instruction metadata: "
+              << getMDNodeString(MDForInst[i].first, MDNames) << "\n";
+        }
+      }
+    }
+  }
+
+  Reporter->checkForFatalErrors();
+  return false;
+}
+
+// This method exists so that the passes can easily be run with opt -analyze.
+// In this case the default constructor is used and we want to reset the error
+// messages after each print.
+void PNaClABIVerifyFunctions::print(llvm::raw_ostream &O, const Module *M)
+    const {
+  Reporter->printErrors(O);
+  Reporter->reset();
+}
+
+char PNaClABIVerifyFunctions::ID = 0;
+INITIALIZE_PASS(PNaClABIVerifyFunctions, "verify-pnaclabi-functions",
+                "Verify functions for PNaCl", false, true)
+
+FunctionPass *llvm::createPNaClABIVerifyFunctionsPass(
+    PNaClABIErrorReporter *Reporter) {
+  return new PNaClABIVerifyFunctions(Reporter);
+}
diff --git a/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp b/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp
new file mode 100644
index 0000000000..17852ebbef
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp
@@ -0,0 +1,516 @@
+//===- PNaClABIVerifyModule.cpp - Verify PNaCl ABI rules ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Verify module-level PNaCl ABI requirements (specifically those that do not
+// require looking at the function bodies)
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "PNaClABITypeChecker.h"
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool>
+PNaClABIAllowDebugMetadata("pnaclabi-allow-debug-metadata",
+  cl::desc("Allow debug metadata during PNaCl ABI verification."),
+  cl::init(false));
+
+}
+
+static cl::opt<bool>
+PNaClABIAllowDevIntrinsics("pnaclabi-allow-dev-intrinsics",
+  cl::desc("Allow all LLVM intrinsics during PNaCl ABI verification."),
+  cl::init(true));  // TODO(jvoung): Make this false by default.
+
+namespace {
+// This pass should not touch function bodies, to stay streaming-friendly
+class PNaClABIVerifyModule : public ModulePass {
+ public:
+  static char ID;
+  PNaClABIVerifyModule() :
+      ModulePass(ID),
+      Reporter(new PNaClABIErrorReporter),
+      ReporterIsOwned(true) {
+    initializePNaClABIVerifyModulePass(*PassRegistry::getPassRegistry());
+  }
+  explicit PNaClABIVerifyModule(PNaClABIErrorReporter *Reporter_,
+                                bool StreamingMode) :
+      ModulePass(ID),
+      Reporter(Reporter_),
+      ReporterIsOwned(false),
+      StreamingMode(StreamingMode) {
+    initializePNaClABIVerifyModulePass(*PassRegistry::getPassRegistry());
+  }
+  ~PNaClABIVerifyModule() {
+    if (ReporterIsOwned)
+      delete Reporter;
+  }
+  bool runOnModule(Module &M);
+  virtual void print(raw_ostream &O, const Module *M) const;
+ private:
+  void checkGlobalValueCommon(const GlobalValue *GV);
+  bool isWhitelistedMetadata(const NamedMDNode *MD);
+
+  /// Returns whether \p GV is an allowed external symbol in stable bitcode.
+  bool isWhitelistedExternal(const GlobalValue *GV);
+
+  void checkGlobalIsFlattened(const GlobalVariable *GV);
+  PNaClABIErrorReporter *Reporter;
+  bool ReporterIsOwned;
+  bool StreamingMode;
+};
+
+class AllowedIntrinsics {
+  LLVMContext *Context;
+  // Maps from an allowed intrinsic's name to its type.
+  StringMap<FunctionType *> Mapping;
+
+  // Tys is an array of type parameters for the intrinsic.  This
+  // defaults to an empty array.
+  void addIntrinsic(Intrinsic::ID ID,
+                    ArrayRef<Type *> Tys = ArrayRef<Type*>()) {
+    Mapping[Intrinsic::getName(ID, Tys)] =
+        Intrinsic::getType(*Context, ID, Tys);
+  }
+public:
+  AllowedIntrinsics(LLVMContext *Context);
+  bool isAllowed(const Function *Func);
+};
+
+static const char *linkageName(GlobalValue::LinkageTypes LT) {
+  // This logic is taken from PrintLinkage in lib/VMCore/AsmWriter.cpp
+  switch (LT) {
+    case GlobalValue::ExternalLinkage: return "external";
+    case GlobalValue::PrivateLinkage:       return "private ";
+    case GlobalValue::LinkerPrivateLinkage: return "linker_private ";
+    case GlobalValue::LinkerPrivateWeakLinkage: return "linker_private_weak ";
+    case GlobalValue::InternalLinkage:      return "internal ";
+    case GlobalValue::LinkOnceAnyLinkage:   return "linkonce ";
+    case GlobalValue::LinkOnceODRLinkage:   return "linkonce_odr ";
+    case GlobalValue::LinkOnceODRAutoHideLinkage:
+      return "linkonce_odr_auto_hide ";
+    case GlobalValue::WeakAnyLinkage:       return "weak ";
+    case GlobalValue::WeakODRLinkage:       return "weak_odr ";
+    case GlobalValue::CommonLinkage:        return "common ";
+    case GlobalValue::AppendingLinkage:     return "appending ";
+    case GlobalValue::DLLImportLinkage:     return "dllimport ";
+    case GlobalValue::DLLExportLinkage:     return "dllexport ";
+    case GlobalValue::ExternalWeakLinkage:  return "extern_weak ";
+    case GlobalValue::AvailableExternallyLinkage:
+      return "available_externally ";
+    default:
+      return "unknown";
+  }
+}
+
+} // end anonymous namespace
+
+// Check linkage type and section attributes, which are the same for
+// GlobalVariables and Functions.
+void PNaClABIVerifyModule::checkGlobalValueCommon(const GlobalValue *GV) {
+  assert(!isa<GlobalAlias>(GV));
+  const char *GVTypeName = isa<GlobalVariable>(GV) ?
+      "Variable " : "Function ";
+  switch (GV->getLinkage()) {
+    case GlobalValue::ExternalLinkage:
+      if (!isWhitelistedExternal(GV)) {
+        Reporter->addError()
+          << GV->getName()
+          << " is not a valid external symbol (disallowed)\n";
+      }
+      break;
+    case GlobalValue::InternalLinkage:
+      break;
+    default:
+      Reporter->addError() << GVTypeName << GV->getName()
+                           << " has disallowed linkage type: "
+                           << linkageName(GV->getLinkage()) << "\n";
+  }
+  if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+    std::string Text = "unknown";
+    if (GV->getVisibility() == GlobalValue::HiddenVisibility) {
+      Text = "hidden";
+    } else if (GV->getVisibility() == GlobalValue::ProtectedVisibility) {
+      Text = "protected";
+    }
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has disallowed visibility: " << Text << "\n";
+  }
+  if (GV->hasSection()) {
+    Reporter->addError() << GVTypeName << GV->getName() <<
+        " has disallowed \"section\" attribute\n";
+  }
+  if (GV->getType()->getAddressSpace() != 0) {
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has addrspace attribute (disallowed)\n";
+  }
+  // The "unnamed_addr" attribute can be used to merge duplicate
+  // definitions, but that should be done by user-toolchain
+  // optimization passes, not by the PNaCl translator.
+  if (GV->hasUnnamedAddr()) {
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has disallowed \"unnamed_addr\" attribute\n";
+  }
+}
+
+AllowedIntrinsics::AllowedIntrinsics(LLVMContext *Context) : Context(Context) {
+  Type *I8Ptr = Type::getInt8PtrTy(*Context);
+  Type *I16 = Type::getInt16Ty(*Context);
+  Type *I32 = Type::getInt32Ty(*Context);
+  Type *I64 = Type::getInt64Ty(*Context);
+  Type *Float = Type::getFloatTy(*Context);
+  Type *Double = Type::getDoubleTy(*Context);
+
+  // We accept bswap for a limited set of types (i16, i32, i64).  The
+  // various backends are able to generate instructions to implement
+  // the intrinsic.  Also, i16 and i64 are easy to implement as along
+  // as there is a way to do i32.
+  addIntrinsic(Intrinsic::bswap, I16);
+  addIntrinsic(Intrinsic::bswap, I32);
+  addIntrinsic(Intrinsic::bswap, I64);
+
+  // We accept cttz, ctlz, and ctpop for a limited set of types (i32, i64).
+  addIntrinsic(Intrinsic::ctlz, I32);
+  addIntrinsic(Intrinsic::ctlz, I64);
+  addIntrinsic(Intrinsic::cttz, I32);
+  addIntrinsic(Intrinsic::cttz, I64);
+  addIntrinsic(Intrinsic::ctpop, I32);
+  addIntrinsic(Intrinsic::ctpop, I64);
+
+  addIntrinsic(Intrinsic::nacl_read_tp);
+  addIntrinsic(Intrinsic::nacl_longjmp);
+  addIntrinsic(Intrinsic::nacl_setjmp);
+
+  // For native sqrt instructions. Must guarantee when x < -0.0, sqrt(x) = NaN.
+  addIntrinsic(Intrinsic::sqrt, Float);
+  addIntrinsic(Intrinsic::sqrt, Double);
+
+  // Stack save and restore are used to support C99 VLAs.
+  addIntrinsic(Intrinsic::stacksave);
+  addIntrinsic(Intrinsic::stackrestore);
+
+  addIntrinsic(Intrinsic::trap);
+
+  // We only allow the variants of memcpy/memmove/memset with an i32
+  // "len" argument, not an i64 argument.
+  Type *MemcpyTypes[] = { I8Ptr, I8Ptr, I32 };
+  addIntrinsic(Intrinsic::memcpy, MemcpyTypes);
+  addIntrinsic(Intrinsic::memmove, MemcpyTypes);
+  Type *MemsetTypes[] = { I8Ptr, I32 };
+  addIntrinsic(Intrinsic::memset, MemsetTypes);
+}
+
+bool AllowedIntrinsics::isAllowed(const Function *Func) {
+  // Keep 3 categories of intrinsics for now.
+  // (1) Allowed always, provided the exact name and type match.
+  // (2) Never allowed
+  // (3) "Dev" intrinsics, which may or may not be allowed.
+  // "Dev" intrinsics are controlled by the PNaClABIAllowDevIntrinsics flag.
+  // Please keep these sorted or grouped in a sensible way, within
+  // each category.
+
+  // (1) Allowed always, provided the exact name and type match.
+  if (Mapping.count(Func->getName()) == 1)
+    return Func->getFunctionType() == Mapping[Func->getName()];
+
+  switch (Func->getIntrinsicID()) {
+    // Disallow by default.
+    default: return false;
+
+    // (2) Known to be never allowed.
+    case Intrinsic::not_intrinsic:
+    // Trampolines depend on a target-specific-sized/aligned buffer.
+    case Intrinsic::adjust_trampoline:
+    case Intrinsic::init_trampoline:
+    // CXX exception handling is not stable.
+    case Intrinsic::eh_dwarf_cfa:
+    case Intrinsic::eh_return_i32:
+    case Intrinsic::eh_return_i64:
+    case Intrinsic::eh_sjlj_callsite:
+    case Intrinsic::eh_sjlj_functioncontext:
+    case Intrinsic::eh_sjlj_longjmp:
+    case Intrinsic::eh_sjlj_lsda:
+    case Intrinsic::eh_sjlj_setjmp:
+    case Intrinsic::eh_typeid_for:
+    case Intrinsic::eh_unwind_init:
+    // We do not want to expose addresses to the user.
+    case Intrinsic::frameaddress:
+    case Intrinsic::returnaddress:
+    // Not supporting stack protectors.
+    case Intrinsic::stackprotector:
+    // Var-args handling is done w/out intrinsics.
+    case Intrinsic::vacopy:
+    case Intrinsic::vaend:
+    case Intrinsic::vastart:
+    // Disallow the *_with_overflow intrinsics because they return
+    // struct types.  All of them can be introduced by passing -ftrapv
+    // to Clang, which we do not support for now.  umul_with_overflow
+    // and uadd_with_overflow are introduced by Clang for C++'s new[],
+    // but ExpandArithWithOverflow expands out this use.
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::usub_with_overflow:
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::umul_with_overflow:
+    // Disallow lifetime.start/end because the semantics of what
+    // arguments they accept are not very well defined, and because it
+    // would be better to do merging of stack slots in the user
+    // toolchain than in the PNaCl translator.
+    // See https://code.google.com/p/nativeclient/issues/detail?id=3443
+    case Intrinsic::lifetime_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::invariant_start:
+    // Some transcendental functions not needed yet.
+    case Intrinsic::cos:
+    case Intrinsic::exp:
+    case Intrinsic::exp2:
+    case Intrinsic::log:
+    case Intrinsic::log2:
+    case Intrinsic::log10:
+    case Intrinsic::pow:
+    case Intrinsic::powi:
+    case Intrinsic::sin:
+    // We run -lower-expect to convert Intrinsic::expect into branch weights
+    // and consume in the middle-end. The backend just ignores llvm.expect.
+    case Intrinsic::expect:
+    // For FLT_ROUNDS macro from float.h. It works for ARM and X86
+    // (but not MIPS). Also, wait until we add a set_flt_rounds intrinsic
+    // before we bless this.
+    case Intrinsic::flt_rounds:
+      return false;
+
+    // (3) Dev intrinsics.
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+      return PNaClABIAllowDevIntrinsics || PNaClABIAllowDebugMetadata;
+    case Intrinsic::nacl_target_arch: // Used by translator self-build.
+    case Intrinsic::prefetch: // TODO(jfb): Use our own data-prefetch intrinsic instead.
+      return PNaClABIAllowDevIntrinsics;
+  }
+}
+
+bool PNaClABIVerifyModule::isWhitelistedMetadata(const NamedMDNode *MD) {
+  return MD->getName().startswith("llvm.dbg.") && PNaClABIAllowDebugMetadata;
+}
+
+bool PNaClABIVerifyModule::isWhitelistedExternal(const GlobalValue *GV) {
+  if (const Function *Func = dyn_cast<const Function>(GV)) {
+    if (Func->getName().equals("_start") || Func->isIntrinsic()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool isPtrToIntOfGlobal(const Constant *C) {
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    return CE->getOpcode() == Instruction::PtrToInt &&
+           isa<GlobalValue>(CE->getOperand(0));
+  }
+  return false;
+}
+
+// This checks for part of the normal form produced by FlattenGlobals.
+static bool isSimpleElement(const Constant *C) {
+  // A SimpleElement is one of the following:
+  // 1) An i8 array literal or zeroinitializer:
+  //      [SIZE x i8] c"DATA"
+  //      [SIZE x i8] zeroinitializer
+  if (ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+    return Ty->getElementType()->isIntegerTy(8) &&
+           (isa<ConstantAggregateZero>(C) ||
+            isa<ConstantDataSequential>(C));
+  }
+  // 2) A reference to a GlobalValue (a function or global variable)
+  //    with an optional byte offset added to it (the addend).
+  if (C->getType()->isIntegerTy(32)) {
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+    if (!CE)
+      return false;
+    // Without addend:  ptrtoint (TYPE* @GLOBAL to i32)
+    if (isPtrToIntOfGlobal(CE))
+      return true;
+    // With addend:  add (i32 ptrtoint (TYPE* @GLOBAL to i32), i32 ADDEND)
+    if (CE->getOpcode() == Instruction::Add &&
+        isPtrToIntOfGlobal(CE->getOperand(0)) &&
+        isa<ConstantInt>(CE->getOperand(1)))
+      return true;
+  }
+  return false;
+}
+
+// This checks for part of the normal form produced by FlattenGlobals.
+static bool isCompoundElement(const Constant *C) {
+  const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+  if (!CS || !CS->getType()->isPacked() || CS->getType()->hasName() ||
+      CS->getNumOperands() <= 1)
+    return false;
+  for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+    if (!isSimpleElement(CS->getOperand(I)))
+      return false;
+  }
+  return true;
+}
+
+static std::string getAttributesAsString(AttributeSet Attrs) {
+  std::string AttrsAsString;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot),
+           E = Attrs.end(Slot); Attr != E; ++Attr) {
+      AttrsAsString += " ";
+      AttrsAsString += Attr->getAsString();
+    }
+  }
+  return AttrsAsString;
+}
+
+// This checks that the GlobalVariable has the normal form produced by
+// the FlattenGlobals pass.
+void PNaClABIVerifyModule::checkGlobalIsFlattened(const GlobalVariable *GV) {
+  if (!GV->hasInitializer()) {
+    Reporter->addError() << "Global variable " << GV->getName()
+                         << " has no initializer (disallowed)\n";
+    return;
+  }
+  const Constant *InitVal = GV->getInitializer();
+  if (isSimpleElement(InitVal) || isCompoundElement(InitVal))
+    return;
+  Reporter->addError() << "Global variable " << GV->getName()
+                       << " has non-flattened initializer (disallowed): "
+                       << *InitVal << "\n";
+}
+
+bool PNaClABIVerifyModule::runOnModule(Module &M) {
+  AllowedIntrinsics Intrinsics(&M.getContext());
+
+  if (!M.getModuleInlineAsm().empty()) {
+    Reporter->addError() <<
+        "Module contains disallowed top-level inline assembly\n";
+  }
+
+  for (Module::const_global_iterator MI = M.global_begin(), ME = M.global_end();
+       MI != ME; ++MI) {
+    checkGlobalIsFlattened(MI);
+    checkGlobalValueCommon(MI);
+
+    if (MI->isThreadLocal()) {
+      Reporter->addError() << "Variable " << MI->getName() <<
+          " has disallowed \"thread_local\" attribute\n";
+    }
+    if (MI->isExternallyInitialized()) {
+      Reporter->addError() << "Variable " << MI->getName() <<
+          " has disallowed \"externally_initialized\" attribute\n";
+    }
+  }
+
+  // No aliases allowed for now.
+  for (Module::alias_iterator MI = M.alias_begin(),
+           E = M.alias_end(); MI != E; ++MI) {
+    Reporter->addError() << "Variable " << MI->getName() <<
+        " is an alias (disallowed)\n";
+  }
+
+  for (Module::const_iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) {
+    if (MI->isIntrinsic()) {
+      // Check intrinsics.
+      if (!Intrinsics.isAllowed(MI)) {
+        Reporter->addError() << "Function " << MI->getName()
+                             << " is a disallowed LLVM intrinsic\n";
+      }
+    } else {
+      // Check types of functions and their arguments.  Not necessary
+      // for intrinsics, whose types are fixed anyway, and which have
+      // argument types that we disallow such as i8.
+      if (!PNaClABITypeChecker::isValidFunctionType(MI->getFunctionType())) {
+        Reporter->addError() << "Function " << MI->getName()
+            << " has disallowed type: "
+            << PNaClABITypeChecker::getTypeName(MI->getFunctionType())
+            << "\n";
+      }
+      // This check is disabled in streaming mode because it would
+      // reject a function that is defined but not read in yet.
+      // Unfortunately this means we simply don't check this property
+      // when translating a pexe in the browser.
+      // TODO(mseaborn): Enforce this property in the bitcode reader.
+      if (!StreamingMode && MI->isDeclaration()) {
+        Reporter->addError() << "Function " << MI->getName()
+                             << " is declared but not defined (disallowed)\n";
+      }
+      if (!MI->getAttributes().isEmpty()) {
+        Reporter->addError()
+            << "Function " << MI->getName() << " has disallowed attributes:"
+            << getAttributesAsString(MI->getAttributes()) << "\n";
+      }
+      if (MI->getCallingConv() != CallingConv::C) {
+        Reporter->addError()
+            << "Function " << MI->getName()
+            << " has disallowed calling convention: "
+            << MI->getCallingConv() << "\n";
+      }
+    }
+
+    checkGlobalValueCommon(MI);
+
+    if (MI->hasGC()) {
+      Reporter->addError() << "Function " << MI->getName() <<
+          " has disallowed \"gc\" attribute\n";
+    }
+    // Knowledge of what function alignments are useful is
+    // architecture-specific and sandbox-specific, so PNaCl pexes
+    // should not be able to specify function alignment.
+    if (MI->getAlignment() != 0) {
+      Reporter->addError() << "Function " << MI->getName() <<
+          " has disallowed \"align\" attribute\n";
+    }
+  }
+
+  // Check named metadata nodes
+  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+    if (!isWhitelistedMetadata(I)) {
+      Reporter->addError() << "Named metadata node " << I->getName()
+                           << " is disallowed\n";
+    }
+  }
+
+  Reporter->checkForFatalErrors();
+  return false;
+}
+
+// This method exists so that the passes can easily be run with opt -analyze.
+// In this case the default constructor is used and we want to reset the error
+// messages after each print (this is more of an issue for the FunctionPass
+// than the ModulePass)
+void PNaClABIVerifyModule::print(llvm::raw_ostream &O, const Module *M) const {
+  Reporter->printErrors(O);
+  Reporter->reset();
+}
+
+char PNaClABIVerifyModule::ID = 0;
+INITIALIZE_PASS(PNaClABIVerifyModule, "verify-pnaclabi-module",
+                "Verify module for PNaCl", false, true)
+
+ModulePass *llvm::createPNaClABIVerifyModulePass(
+    PNaClABIErrorReporter *Reporter, bool StreamingMode) {
+  return new PNaClABIVerifyModule(Reporter, StreamingMode);
+}
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index bb4f03bacc..3671b18d12 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -31,12 +31,21 @@ Module *llvm::ParseAssembly(MemoryBuffer *F,
 
   // If we are parsing into an existing module, do it.
   if (M)
-    return LLParser(F, SM, Err, M).Run() ? 0 : M;
+    // @LOCALMOD-BEGIN
+    if (LLParser(F, SM, Err, M).Run()) {
+      return 0;
+    }
+    else {
+      M->convertMetadataToLibraryList();
+      return M;
+    }
+  // @LOCALMOD-END
 
   // Otherwise create a new module.
   OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
   if (LLParser(F, SM, Err, M2.get()).Run())
     return 0;
+  M2->convertMetadataToLibraryList(); // @LOCALMOD
   return M2.take();
 }
 
diff --git a/lib/Bitcode/CMakeLists.txt b/lib/Bitcode/CMakeLists.txt
index ff7e290cad..8969ec83f5 100644
--- a/lib/Bitcode/CMakeLists.txt
+++ b/lib/Bitcode/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(Reader)
 add_subdirectory(Writer)
+add_subdirectory(NaCl)
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
index af9936bbe8..415a33dfdf 100644
--- a/lib/Bitcode/LLVMBuild.txt
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Reader Writer
+subdirectories = Reader Writer NaCl
 
 [component_0]
 type = Group
diff --git a/lib/Bitcode/Makefile b/lib/Bitcode/Makefile
index 2d6b5ad1fe..cbaab3578c 100644
--- a/lib/Bitcode/Makefile
+++ b/lib/Bitcode/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Reader Writer
+PARALLEL_DIRS = Reader Writer NaCl
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Bitcode/NaCl/CMakeLists.txt b/lib/Bitcode/NaCl/CMakeLists.txt
new file mode 100644
index 0000000000..5a8b272bef
--- /dev/null
+++ b/lib/Bitcode/NaCl/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(Writer)
+add_subdirectory(Reader)
diff --git a/lib/Bitcode/NaCl/LLVMBuild.txt b/lib/Bitcode/NaCl/LLVMBuild.txt
new file mode 100644
index 0000000000..a29928d2a0
--- /dev/null
+++ b/lib/Bitcode/NaCl/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Bitcode/NaCl/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Writer Reader
+
+[component_0]
+type = Group
+name = NaClBitcode
+parent = Bitcode
diff --git a/lib/Bitcode/NaCl/Makefile b/lib/Bitcode/NaCl/Makefile
new file mode 100644
index 0000000000..5bbbc351a1
--- /dev/null
+++ b/lib/Bitcode/NaCl/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Bitcode/NaCl/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+PARALLEL_DIRS = Writer Reader
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Reader/CMakeLists.txt b/lib/Bitcode/NaCl/Reader/CMakeLists.txt
new file mode 100644
index 0000000000..9e4de723c1
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMNaClBitReader
+  NaClBitcodeHeader.cpp
+  NaClBitcodeReader.cpp
+  NaClBitstreamReader.cpp
+  )
+
+add_dependencies(LLVMNaClBitReader intrinsics_gen)
diff --git a/lib/Bitcode/NaCl/Reader/LLVMBuild.txt b/lib/Bitcode/NaCl/Reader/LLVMBuild.txt
new file mode 100644
index 0000000000..acf354f5b5
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/NaClReader/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClBitReader
+parent = NaClBitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/NaCl/Reader/Makefile b/lib/Bitcode/NaCl/Reader/Makefile
new file mode 100644
index 0000000000..92c75c29a4
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/NaCl/Reader/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNaClBitReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp
new file mode 100644
index 0000000000..aa73b9cffa
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp
@@ -0,0 +1,261 @@
+//===- NaClBitcodeHeader.cpp ----------------------------------------------===//
+//     PNaCl bitcode header reader.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/StreamableMemoryObject.h"
+
+#include <limits>
+#include <cstring>
+#include <iomanip>
+
+using namespace llvm;
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField()
+    : ID(kInvalid), FType(kBufferType), Len(0), Data(0) {}
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField(Tag MyID, uint32_t MyValue)
+    : ID(MyID), FType(kUInt32Type), Len(4), Data(new uint8_t[4]) {
+  Data[0] = static_cast<uint8_t>(MyValue & 0xFF);
+  Data[1] = static_cast<uint8_t>((MyValue >> 8) & 0xFF);
+  Data[2] = static_cast<uint8_t>((MyValue >> 16) & 0xFF);
+  Data[3] = static_cast<uint8_t>((MyValue >> 24) & 0xFF);
+}
+
+uint32_t NaClBitcodeHeaderField::GetUInt32Value() const {
+  assert(FType == kUInt32Type && "Header field must be uint32");
+  return static_cast<uint32_t>(Data[0]) |
+         (static_cast<uint32_t>(Data[1]) << 8) |
+         (static_cast<uint32_t>(Data[2]) << 16) |
+         (static_cast<uint32_t>(Data[2]) << 24);
+}
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField(Tag MyID, size_t MyLen,
+                                               uint8_t *MyData)
+    : ID(MyID), FType(kBufferType), Len(MyLen), Data(new uint8_t[MyLen]) {
+  for (size_t i = 0; i < MyLen; ++i) {
+    Data[i] = MyData[i];
+  }
+}
+
+bool NaClBitcodeHeaderField::Write(uint8_t *Buf, size_t BufLen) const {
+  size_t FieldsLen = kTagLenSize + Len;
+  size_t PadLen = (WordSize - (FieldsLen & (WordSize-1))) & (WordSize-1);
+  // Ensure buffer is large enough and that length can be represented
+  // in 32 bits
+  if (BufLen < FieldsLen + PadLen ||
+      Len > std::numeric_limits<FixedSubfield>::max())
+    return false;
+
+  WriteFixedSubfield(EncodeTypedID(), Buf);
+  WriteFixedSubfield(static_cast<FixedSubfield>(Len),
+                     Buf + sizeof(FixedSubfield));
+  memcpy(Buf + kTagLenSize, Data, Len);
+  // Pad out to word alignment
+  if (PadLen) {
+    memset(Buf + FieldsLen, 0, PadLen);
+  }
+  return true;
+}
+
+bool NaClBitcodeHeaderField::Read(const uint8_t *Buf, size_t BufLen) {
+  if (BufLen < kTagLenSize)
+    return false;
+  FixedSubfield IdField;
+  ReadFixedSubfield(&IdField, Buf);
+  FixedSubfield LengthField;
+  ReadFixedSubfield(&LengthField, Buf + sizeof(FixedSubfield));
+  size_t Length = static_cast<size_t>(LengthField);
+  if (BufLen < kTagLenSize + Length)
+    return false;
+  if (Len != Length) {
+    // Need to reallocate data buffer.
+    if (Data)
+      delete[] Data;
+    Data = new uint8_t[Length];
+  }
+  Len = Length;
+  DecodeTypedID(IdField, ID, FType);
+  memcpy(Data, Buf + kTagLenSize, Len);
+  return true;
+}
+
+std::string NaClBitcodeHeaderField::Contents() const {
+  std::string buffer;
+  raw_string_ostream ss(buffer);
+  switch (ID) {
+  case kPNaClVersion:
+    ss << "PNaCl Version";
+    break;
+  case kInvalid:
+    ss << "Invalid";
+    break;
+  default:
+    report_fatal_error("PNaCl bitcode file contains unknown field tag");
+  }
+  ss << ": ";
+  switch (FType) {
+  case kUInt32Type:
+    ss << GetUInt32Value();
+    break;
+  case kBufferType:
+    ss << "[";
+    for (size_t i = 0; i < Len; ++i) {
+      if (i)
+        ss << " ";
+      ss << format("%02x", Data[i]);
+    }
+    ss << "]";
+    break;
+  default:
+    report_fatal_error("PNaCL bitcode file contains unknown field type");
+  }
+  return ss.str();
+}
+
+NaClBitcodeHeader::NaClBitcodeHeader()
+    : HeaderSize(0), UnsupportedMessage(), IsSupportedFlag(false),
+      IsReadableFlag(false), PNaClVersion(0) {}
+
+NaClBitcodeHeader::~NaClBitcodeHeader() {
+  for (std::vector<NaClBitcodeHeaderField *>::const_iterator
+           Iter = Fields.begin(),
+           IterEnd = Fields.end();
+       Iter != IterEnd; ++Iter) {
+    delete *Iter;
+  }
+}
+
+bool NaClBitcodeHeader::ReadPrefix(const unsigned char *BufPtr,
+                                   const unsigned char *BufEnd,
+                                   unsigned &NumFields, unsigned &NumBytes) {
+  // Must contain PEXE.
+  if (!isNaClBitcode(BufPtr, BufEnd))
+    return true;
+  BufPtr += WordSize;
+
+  // Read #Fields and number of bytes needed for the header.
+  if (BufPtr + WordSize > BufEnd)
+    return true;
+  NumFields = static_cast<unsigned>(BufPtr[0]) |
+      (static_cast<unsigned>(BufPtr[1]) << 8);
+  NumBytes = static_cast<unsigned>(BufPtr[2]) |
+      (static_cast<unsigned>(BufPtr[3]) << 8);
+  BufPtr += WordSize;
+  return false;
+}
+
+bool NaClBitcodeHeader::ReadFields(const unsigned char *BufPtr,
+                                   const unsigned char *BufEnd,
+                                   unsigned NumFields, unsigned NumBytes) {
+  HeaderSize = NumBytes + (2 * WordSize);
+
+  // Read in each field.
+  for (size_t i = 0; i < NumFields; ++i) {
+    NaClBitcodeHeaderField *Field = new NaClBitcodeHeaderField();
+    Fields.push_back(Field);
+    if (!Field->Read(BufPtr, BufEnd - BufPtr))
+      return true;
+    size_t FieldSize = Field->GetTotalSize();
+    BufPtr += FieldSize;
+  }
+  return false;
+}
+
+bool NaClBitcodeHeader::Read(const unsigned char *&BufPtr,
+                             const unsigned char *&BufEnd) {
+  unsigned NumFields;
+  unsigned NumBytes;
+  if (ReadPrefix(BufPtr, BufEnd, NumFields, NumBytes))
+    return true;
+  BufPtr += 2 * WordSize;
+
+  if (ReadFields(BufPtr, BufEnd, NumFields, NumBytes))
+    return true;
+  BufPtr += NumBytes;
+  InstallFields();
+  return false;
+}
+
+bool NaClBitcodeHeader::Read(StreamableMemoryObject *Bytes) {
+  unsigned NumFields;
+  unsigned NumBytes;
+  {
+    unsigned char Buffer[2 * WordSize];
+    if (Bytes->readBytes(0, sizeof(Buffer), Buffer, NULL) ||
+        ReadPrefix(Buffer, Buffer + sizeof(Buffer), NumFields, NumBytes))
+      return true;
+  }
+  uint8_t *Header = new uint8_t[NumBytes];
+  bool failed =
+      Bytes->readBytes(2 * WordSize, NumBytes, Header, NULL) ||
+      ReadFields(Header, Header + NumBytes, NumFields, NumBytes);
+  delete[] Header;
+  if (failed)
+    return true;
+  InstallFields();
+  return false;
+}
+
+NaClBitcodeHeaderField *
+NaClBitcodeHeader::GetTaggedField(NaClBitcodeHeaderField::Tag ID) const {
+  for (std::vector<NaClBitcodeHeaderField *>::const_iterator
+           Iter = Fields.begin(),
+           IterEnd = Fields.end();
+       Iter != IterEnd; ++Iter) {
+    if ((*Iter)->GetID() == ID) {
+      return *Iter;
+    }
+  }
+  return 0;
+}
+
+NaClBitcodeHeaderField *NaClBitcodeHeader::GetField(size_t index) const {
+  if (index >= Fields.size())
+    return 0;
+  return Fields[index];
+}
+
+NaClBitcodeHeaderField *GetPNaClVersionPtr(NaClBitcodeHeader *Header) {
+  if (NaClBitcodeHeaderField *Version =
+          Header->GetTaggedField(NaClBitcodeHeaderField::kPNaClVersion)) {
+    if (Version->GetType() == NaClBitcodeHeaderField::kUInt32Type) {
+      return Version;
+    }
+  }
+  return 0;
+}
+
+void NaClBitcodeHeader::InstallFields() {
+  // Assume supported until contradicted.
+  bool UpdatedUnsupportedMessage = false;
+  IsSupportedFlag = true;
+  IsReadableFlag = true;
+  UnsupportedMessage = "Supported";
+  PNaClVersion = 0;
+  if (NaClBitcodeHeaderField *Version = GetPNaClVersionPtr(this)) {
+    PNaClVersion = Version->GetUInt32Value();
+  }
+  if (PNaClVersion != 1) {
+    IsSupportedFlag = false;
+    IsReadableFlag = false;
+    UnsupportedMessage = "Unsupported Version";
+    UpdatedUnsupportedMessage = true;
+  }
+  if (Fields.size() != 1) {
+    IsSupportedFlag = false;
+    IsReadableFlag = false;
+    if (!UpdatedUnsupportedMessage)
+      UnsupportedMessage = "Unknown header field(s) found";
+  }
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
new file mode 100644
index 0000000000..5f14a639ba
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -0,0 +1,2644 @@
+//===- NaClBitcodeReader.cpp ----------------------------------------------===//
+//     Internal NaClBitcodeReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "NaClBitcodeReader"
+
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "NaClBitcodeReader.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+using namespace llvm;
+
+enum {
+  SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
+};
+
+void NaClBitcodeReader::materializeForwardReferencedFunctions() {
+  while (!BlockAddrFwdRefs.empty()) {
+    Function *F = BlockAddrFwdRefs.begin()->first;
+    F->Materialize();
+  }
+}
+
+void NaClBitcodeReader::FreeState() {
+  if (BufferOwned)
+    delete Buffer;
+  Buffer = 0;
+  std::vector<Type*>().swap(TypeList);
+  ValueList.clear();
+
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+  std::vector<Function*>().swap(FunctionsWithBodies);
+  DeferredFunctionInfo.clear();
+
+  assert(BlockAddrFwdRefs.empty() && "Unresolved blockaddress fwd references");
+}
+
+//===----------------------------------------------------------------------===//
+//  Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(ArrayRef<uint64_t> Record, unsigned Idx,
+                            StrTy &Result) {
+  if (Idx > Record.size())
+    return true;
+
+  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+    Result += (char)Record[i];
+  return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown/new linkages to external
+  case 0:  return GlobalValue::ExternalLinkage;
+  case 1:  return GlobalValue::WeakAnyLinkage;
+  case 2:  return GlobalValue::AppendingLinkage;
+  case 3:  return GlobalValue::InternalLinkage;
+  case 4:  return GlobalValue::LinkOnceAnyLinkage;
+  case 5:  return GlobalValue::DLLImportLinkage;
+  case 6:  return GlobalValue::DLLExportLinkage;
+  case 7:  return GlobalValue::ExternalWeakLinkage;
+  case 8:  return GlobalValue::CommonLinkage;
+  case 9:  return GlobalValue::PrivateLinkage;
+  case 10: return GlobalValue::WeakODRLinkage;
+  case 11: return GlobalValue::LinkOnceODRLinkage;
+  case 12: return GlobalValue::AvailableExternallyLinkage;
+  case 13: return GlobalValue::LinkerPrivateLinkage;
+  case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+  case 15: return GlobalValue::LinkOnceODRAutoHideLinkage;
+  }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown visibilities to default.
+  case 0: return GlobalValue::DefaultVisibility;
+  case 1: return GlobalValue::HiddenVisibility;
+  case 2: return GlobalValue::ProtectedVisibility;
+  }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+  switch (Val) {
+  default: return -1;
+  case naclbitc::CAST_TRUNC   : return Instruction::Trunc;
+  case naclbitc::CAST_ZEXT    : return Instruction::ZExt;
+  case naclbitc::CAST_SEXT    : return Instruction::SExt;
+  case naclbitc::CAST_FPTOUI  : return Instruction::FPToUI;
+  case naclbitc::CAST_FPTOSI  : return Instruction::FPToSI;
+  case naclbitc::CAST_UITOFP  : return Instruction::UIToFP;
+  case naclbitc::CAST_SITOFP  : return Instruction::SIToFP;
+  case naclbitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+  case naclbitc::CAST_FPEXT   : return Instruction::FPExt;
+  case naclbitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+  case naclbitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+  case naclbitc::CAST_BITCAST : return Instruction::BitCast;
+  }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
+  switch (Val) {
+  default: return -1;
+  case naclbitc::BINOP_ADD:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+  case naclbitc::BINOP_SUB:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+  case naclbitc::BINOP_MUL:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+  case naclbitc::BINOP_UDIV: return Instruction::UDiv;
+  case naclbitc::BINOP_SDIV:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+  case naclbitc::BINOP_UREM: return Instruction::URem;
+  case naclbitc::BINOP_SREM:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+  case naclbitc::BINOP_SHL:  return Instruction::Shl;
+  case naclbitc::BINOP_LSHR: return Instruction::LShr;
+  case naclbitc::BINOP_ASHR: return Instruction::AShr;
+  case naclbitc::BINOP_AND:  return Instruction::And;
+  case naclbitc::BINOP_OR:   return Instruction::Or;
+  case naclbitc::BINOP_XOR:  return Instruction::Xor;
+  }
+}
+
+static AtomicRMWInst::BinOp GetDecodedRMWOperation(unsigned Val) {
+  switch (Val) {
+  default: return AtomicRMWInst::BAD_BINOP;
+  case naclbitc::RMW_XCHG: return AtomicRMWInst::Xchg;
+  case naclbitc::RMW_ADD: return AtomicRMWInst::Add;
+  case naclbitc::RMW_SUB: return AtomicRMWInst::Sub;
+  case naclbitc::RMW_AND: return AtomicRMWInst::And;
+  case naclbitc::RMW_NAND: return AtomicRMWInst::Nand;
+  case naclbitc::RMW_OR: return AtomicRMWInst::Or;
+  case naclbitc::RMW_XOR: return AtomicRMWInst::Xor;
+  case naclbitc::RMW_MAX: return AtomicRMWInst::Max;
+  case naclbitc::RMW_MIN: return AtomicRMWInst::Min;
+  case naclbitc::RMW_UMAX: return AtomicRMWInst::UMax;
+  case naclbitc::RMW_UMIN: return AtomicRMWInst::UMin;
+  }
+}
+
+static AtomicOrdering GetDecodedOrdering(unsigned Val) {
+  switch (Val) {
+  case naclbitc::ORDERING_NOTATOMIC: return NotAtomic;
+  case naclbitc::ORDERING_UNORDERED: return Unordered;
+  case naclbitc::ORDERING_MONOTONIC: return Monotonic;
+  case naclbitc::ORDERING_ACQUIRE: return Acquire;
+  case naclbitc::ORDERING_RELEASE: return Release;
+  case naclbitc::ORDERING_ACQREL: return AcquireRelease;
+  default: // Map unknown orderings to sequentially-consistent.
+  case naclbitc::ORDERING_SEQCST: return SequentiallyConsistent;
+  }
+}
+
+static SynchronizationScope GetDecodedSynchScope(unsigned Val) {
+  switch (Val) {
+  case naclbitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread;
+  default: // Map unknown scopes to cross-thread.
+  case naclbitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread;
+  }
+}
+
+static CallingConv::ID GetDecodedCallingConv(unsigned Val) {
+  switch (Val) {
+  default:
+    report_fatal_error("PNaCl bitcode contains invalid calling conventions.");
+  case naclbitc::C_CallingConv: return CallingConv::C;
+  }
+}
+
+namespace llvm {
+namespace {
+  /// @brief A class for maintaining the slot number definition
+  /// as a placeholder for the actual definition for forward constants defs.
+  class ConstantPlaceHolder : public ConstantExpr {
+    void operator=(const ConstantPlaceHolder &) LLVM_DELETED_FUNCTION;
+  public:
+    // allocate space for exactly one operand
+    void *operator new(size_t s) {
+      return User::operator new(s, 1);
+    }
+    explicit ConstantPlaceHolder(Type *Ty, LLVMContext& Context)
+      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+      Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+    }
+
+    /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+    static bool classof(const Value *V) {
+      return isa<ConstantExpr>(V) &&
+             cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+    }
+
+
+    /// Provide fast operand accessors
+    //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> :
+  public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
+};
+}
+
+
+void NaClBitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+  assert(V);
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // Handle constants and non-constants (e.g. instrs) differently for
+  // efficiency.
+  if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+    ResolveConstants.push_back(std::make_pair(PHC, Idx));
+    OldV = V;
+  } else {
+    // If there was a forward reference to this value, replace it.
+    Value *PrevVal = OldV;
+    OldV->replaceAllUsesWith(V);
+    delete PrevVal;
+  }
+}
+
+void NaClBitcodeReaderValueList::AssignGlobalVar(GlobalVariable *GV,
+                                                 unsigned Idx) {
+  assert(GV);
+
+  if (Idx == size()) {
+    push_back(GV);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = GV;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  Value *PrevVal = OldV;
+  GlobalVariable *Placeholder = cast<GlobalVariable>(PrevVal);
+  Placeholder->replaceAllUsesWith(
+      ConstantExpr::getBitCast(GV, Placeholder->getType()));
+  Placeholder->eraseFromParent();
+  ValuePtrs[Idx] = GV;
+}
+
+Constant *NaClBitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+                                                        Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    assert(Ty == V->getType() && "Type mismatch in constant table!");
+    return cast<Constant>(V);
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Constant *C = new ConstantPlaceHolder(Ty, Context);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+Value *NaClBitcodeReaderValueList::getValueFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    return 0;
+
+  if (Value *V = ValuePtrs[Idx])
+    return V;
+
+  return 0;
+}
+
+bool NaClBitcodeReaderValueList::createValueFwdRef(unsigned Idx, Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  // Return an error if this a duplicate definition of Idx.
+  if (ValuePtrs[Idx])
+    return true;
+
+  // No type specified, must be invalid reference.
+  if (Ty == 0)
+    return true;
+
+  // Create a placeholder, which will later be RAUW'd.
+  ValuePtrs[Idx] = new Argument(Ty);
+  return false;
+}
+
+Constant *NaClBitcodeReaderValueList::getOrCreateGlobalVarRef(
+    unsigned Idx, Module *M) {
+  // First make sure the element for Idx is defined.
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  // Now get its value (if applicable).
+  if (Value *V = ValuePtrs[Idx])
+    return dyn_cast<Constant>(V);
+
+  // Create a placeholder, which will later be RAUW'd.
+  Type *PlaceholderType = Type::getInt8Ty(Context);
+
+  Constant *C =
+      new GlobalVariable(*M, PlaceholderType, false,
+                         GlobalValue::ExternalLinkage, 0);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references.  The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants.  Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant.  Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void NaClBitcodeReaderValueList::ResolveConstantForwardRefs() {
+  // Sort the values by-pointer so that they are efficient to look up with a
+  // binary search.
+  std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+  SmallVector<Constant*, 64> NewOps;
+
+  while (!ResolveConstants.empty()) {
+    Value *RealVal = operator[](ResolveConstants.back().second);
+    Constant *Placeholder = ResolveConstants.back().first;
+    ResolveConstants.pop_back();
+
+    // Loop over all users of the placeholder, updating them to reference the
+    // new value.  If they reference more than one placeholder, update them all
+    // at once.
+    while (!Placeholder->use_empty()) {
+      Value::use_iterator UI = Placeholder->use_begin();
+      User *U = *UI;
+
+      // If the using object isn't uniqued, just update the operands.  This
+      // handles instructions and initializers for global variables.
+      if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+        UI.getUse().set(RealVal);
+        continue;
+      }
+
+      // Otherwise, we have a constant that uses the placeholder.  Replace that
+      // constant with a new constant that has *all* placeholder uses updated.
+      Constant *UserC = cast<Constant>(U);
+      for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+           I != E; ++I) {
+        Value *NewOp;
+        if (!isa<ConstantPlaceHolder>(*I)) {
+          // Not a placeholder reference.
+          NewOp = *I;
+        } else if (*I == Placeholder) {
+          // Common case is that it just references this one placeholder.
+          NewOp = RealVal;
+        } else {
+          // Otherwise, look up the placeholder in ResolveConstants.
+          ResolveConstantsTy::iterator It =
+            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+                             std::pair<Constant*, unsigned>(cast<Constant>(*I),
+                                                            0));
+          assert(It != ResolveConstants.end() && It->first == *I);
+          NewOp = operator[](It->second);
+        }
+
+        NewOps.push_back(cast<Constant>(NewOp));
+      }
+
+      // Make the new constant.
+      Constant *NewC;
+      if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
+      } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
+      } else if (isa<ConstantVector>(UserC)) {
+        NewC = ConstantVector::get(NewOps);
+      } else {
+        assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+        NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
+      }
+
+      UserC->replaceAllUsesWith(NewC);
+      UserC->destroyConstant();
+      NewOps.clear();
+    }
+
+    // Update all ValueHandles, they should be the only users at this point.
+    Placeholder->replaceAllUsesWith(RealVal);
+    delete Placeholder;
+  }
+}
+
+Type *NaClBitcodeReader::getTypeByID(unsigned ID) {
+  // The type table size is always specified correctly.
+  if (ID >= TypeList.size())
+    return 0;
+
+  if (Type *Ty = TypeList[ID])
+    return Ty;
+
+  // If we have a forward reference, the only possible case is when it is to a
+  // named struct.  Just create a placeholder for now.
+  return TypeList[ID] = StructType::create(Context);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+
+bool NaClBitcodeReader::ParseTypeTable() {
+  DEBUG(dbgs() << "-> ParseTypeTable\n");
+  if (Stream.EnterSubBlock(naclbitc::TYPE_BLOCK_ID_NEW))
+    return Error("Malformed block record");
+
+  bool result = ParseTypeTableBody();
+  if (!result)
+    DEBUG(dbgs() << "<- ParseTypeTable\n");
+  return result;
+}
+
+bool NaClBitcodeReader::ParseTypeTableBody() {
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+
+  SmallVector<uint64_t, 64> Record;
+  unsigned NumRecords = 0;
+
+  SmallString<64> TypeName;
+
+  // Read all the records for this type table.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      Error("Error in the type table block");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      if (NumRecords != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: return Error("unknown type in type table");
+    case naclbitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case naclbitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_HALF:     // HALF
+      ResultTy = Type::getHalfTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case naclbitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case naclbitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case naclbitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case naclbitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+                                    //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      ResultTy = getTypeByID(Record[0]);
+      if (ResultTy == 0) return Error("invalid element type in pointer type");
+      ResultTy = PointerType::get(ResultTy, AddressSpace);
+      break;
+    }
+    case naclbitc::TYPE_CODE_FUNCTION_OLD: {
+      // FIXME: attrid is dead, remove it in LLVM 4.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      SmallVector<Type*, 8> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+
+      ResultTy = getTypeByID(Record[2]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+        return Error("invalid type in function type");
+
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case naclbitc::TYPE_CODE_FUNCTION: {
+      // FUNCTION: [vararg, retty, paramty x N]
+      if (Record.size() < 2)
+        return Error("Invalid FUNCTION type record");
+      SmallVector<Type*, 8> ArgTys;
+      for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+
+      ResultTy = getTypeByID(Record[1]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+        return Error("invalid type in function type");
+
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case naclbitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      SmallVector<Type*, 8> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid type in struct type");
+      ResultTy = StructType::get(Context, EltTys, Record[0]);
+      break;
+    }
+    case naclbitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
+      if (ConvertToString(Record, 0, TypeName))
+        return Error("Invalid STRUCT_NAME record");
+      continue;
+
+    case naclbitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct.
+        Res = StructType::create(Context, TypeName);
+      TypeName.clear();
+
+      SmallVector<Type*, 8> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid STRUCT type record");
+      Res->setBody(EltTys, Record[0]);
+      ResultTy = Res;
+      break;
+    }
+    case naclbitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
+      if (Record.size() != 1)
+        return Error("Invalid OPAQUE type record");
+
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct with no body.
+        Res = StructType::create(Context, TypeName);
+      TypeName.clear();
+      ResultTy = Res;
+      break;
+    }
+    case naclbitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
+      break;
+    case naclbitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
+      break;
+    }
+
+    if (NumRecords >= TypeList.size())
+      return Error("invalid TYPE table");
+    assert(ResultTy && "Didn't read a type?");
+    assert(TypeList[NumRecords] == 0 && "Already read type?");
+    TypeList[NumRecords++] = ResultTy;
+  }
+}
+
+bool NaClBitcodeReader::ParseGlobalVars() {
+  if (Stream.EnterSubBlock(naclbitc::GLOBALVAR_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // True when processing a global variable. Stays true until all records
+  // are processed, and the global variable is created.
+  bool ProcessingGlobal = false;
+  // The alignment value defined for the global variable.
+  unsigned VarAlignment = 0;
+  // True if the variable is read-only.
+  bool VarIsConstant = false;
+  // The initializer for the global variable.
+  SmallVector<Constant *, 10> VarInit;
+  // The number of initializers needed for the global variable.
+  unsigned VarInitializersNeeded = 0;
+  unsigned FirstValueNo = ValueList.size();
+  // The index of the next global variable.
+  unsigned NextValueNo = FirstValueNo;
+  // The number of expected global variable definitions.
+  unsigned NumGlobals = 0;
+
+  // Read all global variable records.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock:
+    case NaClBitstreamEntry::Error:
+      return Error("Error in the global vars block");
+    case NaClBitstreamEntry::EndBlock:
+      if (ProcessingGlobal || NumGlobals != (NextValueNo - FirstValueNo))
+        return Error("Error in the global vars block");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned Bitcode = Stream.readRecord(Entry.ID, Record);
+    switch (Bitcode) {
+    default: return Error("Unknown global variable entry");
+    case naclbitc::GLOBALVAR_VAR:
+      // Start the definition of a global variable.
+      if (ProcessingGlobal || Record.size() != 2)
+        return Error("Bad GLOBALVAR_VAR record");
+      ProcessingGlobal = true;
+      VarAlignment = (1 << Record[0]) >> 1;
+      VarIsConstant = Record[1] != 0;
+      // Assume (by default) there is a single initializer.
+      VarInitializersNeeded = 1;
+      break;
+    case naclbitc::GLOBALVAR_COMPOUND:
+      // Global variable has multiple initializers. Changes the
+      // default number of initializers to the given value in
+      // Record[0].
+      if (!ProcessingGlobal || !VarInit.empty() ||
+          VarInitializersNeeded != 1 || Record.size() != 1)
+        return Error("Bad GLOBALVAR_COMPOUND record");
+      VarInitializersNeeded = Record[0];
+      break;
+    case naclbitc::GLOBALVAR_ZEROFILL: {
+      // Define an initializer that defines a sequence of zero-filled bytes.
+      if (!ProcessingGlobal || Record.size() != 1)
+        return Error("Bad GLOBALVAR_ZEROFILL record");
+      Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Record[0]);
+      Constant *Zero = ConstantAggregateZero::get(Ty);
+      VarInit.push_back(Zero);
+      break;
+    }
+    case naclbitc::GLOBALVAR_DATA: {
+      // Defines an initializer defined by a sequence of byte values.
+      if (!ProcessingGlobal || Record.size() < 1)
+        return Error("Bad GLOBALVAR_DATA record");
+      unsigned Size = Record.size();
+      uint8_t *Buf = new uint8_t[Size];
+      assert(Buf);
+      for (unsigned i = 0; i < Size; ++i)
+        Buf[i] = Record[i];
+      Constant *Init = ConstantDataArray::get(
+          Context, ArrayRef<uint8_t>(Buf, Buf + Size));
+      VarInit.push_back(Init);
+      delete[] Buf;
+      break;
+    }
+    case naclbitc::GLOBALVAR_RELOC: {
+      // Define a relocation initializer.
+      if (!ProcessingGlobal || Record.size() < 1 || Record.size() > 2)
+        return Error("Bad GLOBALVAR_RELOC record");
+      Constant *BaseVal =
+          ValueList.getOrCreateGlobalVarRef(Record[0], TheModule);
+      if (BaseVal == 0)
+        return Error("Bad base value in GLOBALVAR_RELOC record");
+      Type *IntPtrType = IntegerType::get(Context, 32);
+      Constant *Val = ConstantExpr::getPtrToInt(BaseVal, IntPtrType);
+      if (Record.size() == 2) {
+        uint32_t Addend = Record[1];
+        Val = ConstantExpr::getAdd(Val, ConstantInt::get(IntPtrType,
+                                                         Addend));
+      }
+      VarInit.push_back(Val);
+      break;
+    }
+    case naclbitc::GLOBALVAR_COUNT:
+      if (Record.size() != 1 || NumGlobals != 0)
+        return Error("Invalid global count record");
+      NumGlobals = Record[0];
+      break;
+    }
+
+    // If more initializers needed for global variable, continue processing.
+    if (!ProcessingGlobal || VarInit.size() < VarInitializersNeeded)
+      continue;
+
+    Constant *Init = 0;
+    switch (VarInit.size()) {
+    case 0:
+      return Error("No initializer for global variable in global vars block");
+    case 1:
+      Init = VarInit[0];
+      break;
+    default:
+      Init = ConstantStruct::getAnon(Context, VarInit, true);
+      break;
+    }
+    GlobalVariable *GV = new GlobalVariable(
+        *TheModule, Init->getType(), VarIsConstant,
+        GlobalValue::InternalLinkage, Init, "");
+    GV->setAlignment(VarAlignment);
+    ValueList.AssignGlobalVar(GV, NextValueNo);
+    ++NextValueNo;
+    ProcessingGlobal = false;
+    VarAlignment = 0;
+    VarIsConstant = false;
+    VarInitializersNeeded = 0;
+    VarInit.clear();
+  }
+}
+
+bool NaClBitcodeReader::ParseValueSymbolTable() {
+  DEBUG(dbgs() << "-> ParseValueSymbolTable\n");
+  if (Stream.EnterSubBlock(naclbitc::VALUE_SYMTAB_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  SmallString<128> ValueName;
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      return Error("malformed value symbol table block");
+    case NaClBitstreamEntry::EndBlock:
+      DEBUG(dbgs() << "<- ParseValueSymbolTable\n");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case naclbitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_ENTRY record");
+      unsigned ValueID = Record[0];
+      if (ValueID >= ValueList.size())
+        return Error("Invalid Value ID in VST_ENTRY record");
+      Value *V = ValueList[ValueID];
+
+      V->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    case naclbitc::VST_CODE_BBENTRY: {
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_BBENTRY record");
+      BasicBlock *BB = getBasicBlock(Record[0]);
+      if (BB == 0)
+        return Error("Invalid BB ID in VST_BBENTRY record");
+
+      BB->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    }
+  }
+}
+
+/// ResolveAliasInits - Resolve all of the initializers for aliases that we can.
+bool NaClBitcodeReader::ResolveAliasInits() {
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+  AliasInitWorklist.swap(AliasInits);
+
+  while (!AliasInitWorklist.empty()) {
+    unsigned ValID = AliasInitWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      AliasInits.push_back(AliasInitWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+        AliasInitWorklist.back().first->setAliasee(C);
+      else
+        return Error("Alias initializer is not a constant!");
+    }
+    AliasInitWorklist.pop_back();
+  }
+  return false;
+}
+
+static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
+  SmallVector<uint64_t, 8> Words(Vals.size());
+  std::transform(Vals.begin(), Vals.end(), Words.begin(),
+                 NaClDecodeSignRotatedValue);
+
+  return APInt(TypeBits, Words);
+}
+
+bool NaClBitcodeReader::ParseConstants() {
+  DEBUG(dbgs() << "-> ParseConstants\n");
+  if (Stream.EnterSubBlock(naclbitc::CONSTANTS_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  Type *CurTy = Type::getInt32Ty(Context);
+  unsigned NextCstNo = ValueList.size();
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      return Error("malformed block record in AST file");
+    case NaClBitstreamEntry::EndBlock:
+      if (NextCstNo != ValueList.size())
+        return Error("Invalid constant reference!");
+
+      // Once all the constants have been read, go through and resolve forward
+      // references.
+      ValueList.ResolveConstantForwardRefs();
+      DEBUG(dbgs() << "<- ParseConstants\n");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Value *V = 0;
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default:  // Default behavior: unknown constant
+    case naclbitc::CST_CODE_UNDEF:     // UNDEF
+      V = UndefValue::get(CurTy);
+      break;
+    case naclbitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
+      if (Record.empty())
+        return Error("Malformed CST_SETTYPE record");
+      if (Record[0] >= TypeList.size())
+        return Error("Invalid Type ID in CST_SETTYPE record");
+      CurTy = TypeList[Record[0]];
+      continue;  // Skip the ValueList manipulation.
+    case naclbitc::CST_CODE_NULL:      // NULL
+      V = Constant::getNullValue(CurTy);
+      break;
+    case naclbitc::CST_CODE_INTEGER:   // INTEGER: [intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid CST_INTEGER record");
+      V = ConstantInt::get(CurTy, NaClDecodeSignRotatedValue(Record[0]));
+      break;
+    case naclbitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid WIDE_INTEGER record");
+
+      APInt VInt = ReadWideAPInt(Record,
+                                 cast<IntegerType>(CurTy)->getBitWidth());
+      V = ConstantInt::get(Context, VInt);
+
+      break;
+    }
+    case naclbitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
+      if (Record.empty())
+        return Error("Invalid FLOAT record");
+      if (CurTy->isHalfTy())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
+                                             APInt(16, (uint16_t)Record[0])));
+      else if (CurTy->isFloatTy())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle,
+                                             APInt(32, (uint32_t)Record[0])));
+      else if (CurTy->isDoubleTy())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble,
+                                             APInt(64, Record[0])));
+      else if (CurTy->isX86_FP80Ty()) {
+        // Bits are not stored the same way as a normal i80 APInt, compensate.
+        uint64_t Rearrange[2];
+        Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+        Rearrange[1] = Record[0] >> 48;
+        V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended,
+                                             APInt(80, Rearrange)));
+      } else if (CurTy->isFP128Ty())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad,
+                                             APInt(128, Record)));
+      else if (CurTy->isPPC_FP128Ty())
+        V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble,
+                                             APInt(128, Record)));
+      else
+        V = UndefValue::get(CurTy);
+      break;
+    }
+
+    case naclbitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      unsigned Size = Record.size();
+      SmallVector<Constant*, 16> Elts;
+
+      if (StructType *STy = dyn_cast<StructType>(CurTy)) {
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+                                                     STy->getElementType(i)));
+        V = ConstantStruct::get(STy, Elts);
+      } else if (ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+        Type *EltTy = ATy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantArray::get(ATy, Elts);
+      } else if (VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+        Type *EltTy = VTy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantVector::get(Elts);
+      } else {
+        V = UndefValue::get(CurTy);
+      }
+      break;
+    }
+    case naclbitc::CST_CODE_STRING:    // STRING: [values]
+    case naclbitc::CST_CODE_CSTRING: { // CSTRING: [values]
+      if (Record.empty())
+        return Error("Invalid CST_STRING record");
+
+      SmallString<16> Elts(Record.begin(), Record.end());
+      V = ConstantDataArray::getString(Context, Elts,
+                                       BitCode == naclbitc::CST_CODE_CSTRING);
+      break;
+    }
+    case naclbitc::CST_CODE_DATA: {// DATA: [n x value]
+      if (Record.empty())
+        return Error("Invalid CST_DATA record");
+
+      Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
+      unsigned Size = Record.size();
+
+      if (EltTy->isIntegerTy(8)) {
+        SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(16)) {
+        SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(32)) {
+        SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isIntegerTy(64)) {
+        SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isFloatTy()) {
+        SmallVector<float, 16> Elts(Size);
+        std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat);
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else if (EltTy->isDoubleTy()) {
+        SmallVector<double, 16> Elts(Size);
+        std::transform(Record.begin(), Record.end(), Elts.begin(),
+                       BitsToDouble);
+        if (isa<VectorType>(CurTy))
+          V = ConstantDataVector::get(Context, Elts);
+        else
+          V = ConstantDataArray::get(Context, Elts);
+      } else {
+        return Error("Unknown element type in CE_DATA");
+      }
+      break;
+    }
+
+    case naclbitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+      int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown binop.
+      } else {
+        Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+        Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+        unsigned Flags = 0;
+        if (Record.size() >= 4) {
+          if (Opc == Instruction::Add ||
+              Opc == Instruction::Sub ||
+              Opc == Instruction::Mul ||
+              Opc == Instruction::Shl) {
+            if (Record[3] & (1 << naclbitc::OBO_NO_SIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoSignedWrap;
+            if (Record[3] & (1 << naclbitc::OBO_NO_UNSIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+          } else if (Opc == Instruction::SDiv ||
+                     Opc == Instruction::UDiv ||
+                     Opc == Instruction::LShr ||
+                     Opc == Instruction::AShr) {
+            if (Record[3] & (1 << naclbitc::PEO_EXACT))
+              Flags |= SDivOperator::IsExact;
+          }
+        }
+        V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+      }
+      break;
+    }
+    case naclbitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
+      if (Record.size() < 3) return Error("Invalid CE_CAST record");
+      int Opc = GetDecodedCastOpcode(Record[0]);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown cast.
+      } else {
+        Type *OpTy = getTypeByID(Record[1]);
+        if (!OpTy) return Error("Invalid CE_CAST record");
+        Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+        V = ConstantExpr::getCast(Opc, Op, CurTy);
+      }
+      break;
+    }
+    case naclbitc::CST_CODE_CE_INBOUNDS_GEP:
+    case naclbitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
+      if (Record.size() & 1) return Error("Invalid CE_GEP record");
+      SmallVector<Constant*, 16> Elts;
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        Type *ElTy = getTypeByID(Record[i]);
+        if (!ElTy) return Error("Invalid CE_GEP record");
+        Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+      }
+      ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
+      V = ConstantExpr::getGetElementPtr(Elts[0], Indices,
+                                         BitCode ==
+                                           naclbitc::CST_CODE_CE_INBOUNDS_GEP);
+      break;
+    }
+    case naclbitc::CST_CODE_CE_SELECT:  // CE_SELECT: [opval#, opval#, opval#]
+      if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+      V = ConstantExpr::getSelect(
+                          ValueList.getConstantFwdRef(Record[0],
+                                                      Type::getInt1Ty(Context)),
+                          ValueList.getConstantFwdRef(Record[1],CurTy),
+                          ValueList.getConstantFwdRef(Record[2],CurTy));
+      break;
+    case naclbitc::CST_CODE_CE_EXTRACTELT: {
+      // CE_EXTRACTELT: [opty, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+      VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
+                                                  Type::getInt32Ty(Context));
+      V = ConstantExpr::getExtractElement(Op0, Op1);
+      break;
+    }
+    case naclbitc::CST_CODE_CE_INSERTELT: {// CE_INSERTELT: [opval, opval, opval]
+      VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_INSERTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+                                                  OpTy->getElementType());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
+                                                  Type::getInt32Ty(Context));
+      V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+      break;
+    }
+    case naclbitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+      VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_SHUFFLEVEC record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 OpTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case naclbitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+      VectorType *RTy = dyn_cast<VectorType>(CurTy);
+      VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+        return Error("Invalid CE_SHUFVEC_EX record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+      Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 RTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case naclbitc::CST_CODE_CE_CMP: {     // CE_CMP: [opty, opval, opval, pred]
+      if (Record.size() < 4) return Error("Invalid CE_CMP record");
+      Type *OpTy = getTypeByID(Record[0]);
+      if (OpTy == 0) return Error("Invalid CE_CMP record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+      if (OpTy->isFPOrFPVectorTy())
+        V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+      else
+        V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+      break;
+    }
+    // This maintains backward compatibility, pre-asm dialect keywords.
+    // FIXME: Remove with the 4.0 release.
+    case naclbitc::CST_CODE_INLINEASM_OLD: {
+      if (Record.size() < 2) return Error("Invalid INLINEASM record");
+      std::string AsmStr, ConstrStr;
+      bool HasSideEffects = Record[0] & 1;
+      bool IsAlignStack = Record[0] >> 1;
+      unsigned AsmStrSize = Record[1];
+      if (2+AsmStrSize >= Record.size())
+        return Error("Invalid INLINEASM record");
+      unsigned ConstStrSize = Record[2+AsmStrSize];
+      if (3+AsmStrSize+ConstStrSize > Record.size())
+        return Error("Invalid INLINEASM record");
+
+      for (unsigned i = 0; i != AsmStrSize; ++i)
+        AsmStr += (char)Record[2+i];
+      for (unsigned i = 0; i != ConstStrSize; ++i)
+        ConstrStr += (char)Record[3+AsmStrSize+i];
+      PointerType *PTy = cast<PointerType>(CurTy);
+      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      break;
+    }
+    // This version adds support for the asm dialect keywords (e.g.,
+    // inteldialect).
+    case naclbitc::CST_CODE_INLINEASM: {
+      if (Record.size() < 2) return Error("Invalid INLINEASM record");
+      std::string AsmStr, ConstrStr;
+      bool HasSideEffects = Record[0] & 1;
+      bool IsAlignStack = (Record[0] >> 1) & 1;
+      unsigned AsmDialect = Record[0] >> 2;
+      unsigned AsmStrSize = Record[1];
+      if (2+AsmStrSize >= Record.size())
+        return Error("Invalid INLINEASM record");
+      unsigned ConstStrSize = Record[2+AsmStrSize];
+      if (3+AsmStrSize+ConstStrSize > Record.size())
+        return Error("Invalid INLINEASM record");
+
+      for (unsigned i = 0; i != AsmStrSize; ++i)
+        AsmStr += (char)Record[2+i];
+      for (unsigned i = 0; i != ConstStrSize; ++i)
+        ConstrStr += (char)Record[3+AsmStrSize+i];
+      PointerType *PTy = cast<PointerType>(CurTy);
+      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+                         InlineAsm::AsmDialect(AsmDialect));
+      break;
+    }
+    case naclbitc::CST_CODE_BLOCKADDRESS:{
+      if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+      Type *FnTy = getTypeByID(Record[0]);
+      if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      Function *Fn =
+        dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+      if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+
+      // If the function is already parsed we can insert the block address right
+      // away.
+      if (!Fn->empty()) {
+        Function::iterator BBI = Fn->begin(), BBE = Fn->end();
+        for (size_t I = 0, E = Record[2]; I != E; ++I) {
+          if (BBI == BBE)
+            return Error("Invalid blockaddress block #");
+          ++BBI;
+        }
+        V = BlockAddress::get(Fn, BBI);
+      } else {
+        // Otherwise insert a placeholder and remember it so it can be inserted
+        // when the function is parsed.
+        GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+                                                    Type::getInt8Ty(Context),
+                                            false, GlobalValue::InternalLinkage,
+                                                    0, "");
+        BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+        V = FwdRef;
+      }
+      break;
+    }
+    }
+
+    ValueList.AssignValue(V, NextCstNo);
+    ++NextCstNo;
+  }
+}
+
+bool NaClBitcodeReader::ParseUseLists() {
+  DEBUG(dbgs() << "-> ParseUseLists\n");
+  if (Stream.EnterSubBlock(naclbitc::USELIST_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      return Error("malformed use list block");
+    case NaClBitstreamEntry::EndBlock:
+      DEBUG(dbgs() << "<- ParseUseLists\n");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a use list record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case naclbitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD.
+      unsigned RecordLength = Record.size();
+      if (RecordLength < 1)
+        return Error ("Invalid UseList reader!");
+      UseListRecords.push_back(Record);
+      break;
+    }
+    }
+  }
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it.  This lets us lazily deserialize the
+/// functions.
+bool NaClBitcodeReader::RememberAndSkipFunctionBody() {
+  DEBUG(dbgs() << "-> RememberAndSkipFunctionBody\n");
+  // Get the function we are talking about.
+  if (FunctionsWithBodies.empty())
+    return Error("Insufficient function protos");
+
+  Function *Fn = FunctionsWithBodies.back();
+  FunctionsWithBodies.pop_back();
+
+  // Save the current stream state.
+  uint64_t CurBit = Stream.GetCurrentBitNo();
+  DeferredFunctionInfo[Fn] = CurBit;
+
+  // Skip over the function block for now.
+  if (Stream.SkipBlock())
+    return Error("Malformed block record");
+  DEBUG(dbgs() << "<- RememberAndSkipFunctionBody\n");
+  return false;
+}
+
+bool NaClBitcodeReader::GlobalCleanup() {
+  // Patch the initializers for globals and aliases up.
+  ResolveAliasInits();
+
+  if (!AliasInits.empty())
+    return Error("Malformed Alias Initializer");
+
+  // Look for intrinsic functions which need to be upgraded at some point
+  for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    Function *NewFn;
+    if (UpgradeIntrinsicFunction(FI, NewFn))
+      UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+  }
+
+  // Look for global variables which need to be renamed.
+  for (Module::global_iterator
+         GI = TheModule->global_begin(), GE = TheModule->global_end();
+       GI != GE; ++GI)
+    UpgradeGlobalVariable(GI);
+  std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+  return false;
+}
+
+bool NaClBitcodeReader::ParseModule(bool Resume) {
+  DEBUG(dbgs() << "-> ParseModule\n");
+  if (Resume)
+    Stream.JumpToBit(NextUnreadBit);
+  else if (Stream.EnterSubBlock(naclbitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  std::vector<std::string> SectionTable;
+  std::vector<std::string> GCTable;
+
+  // Read all the records for this module.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      Error("malformed module block");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      DEBUG(dbgs() << "<- ParseModule\n");
+      return GlobalCleanup();
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      default:  // Skip unknown content.
+        DEBUG(dbgs() << "Skip unknown context\n");
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case naclbitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case naclbitc::TYPE_BLOCK_ID_NEW:
+        if (ParseTypeTable())
+          return true;
+        break;
+      case naclbitc::GLOBALVAR_BLOCK_ID:
+        if (ParseGlobalVars())
+          return true;
+        break;
+      case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        SeenValueSymbolTable = true;
+        break;
+      case naclbitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants() || ResolveAliasInits())
+          return true;
+        break;
+      case naclbitc::FUNCTION_BLOCK_ID:
+        // If this is the first function body we've seen, reverse the
+        // FunctionsWithBodies list.
+        if (!SeenFirstFunctionBody) {
+          std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+          if (GlobalCleanup())
+            return true;
+          SeenFirstFunctionBody = true;
+        }
+
+        if (RememberAndSkipFunctionBody())
+          return true;
+
+        // For streaming bitcode, suspend parsing when we reach the function
+        // bodies. Subsequent materialization calls will resume it when
+        // necessary. For streaming, the function bodies must be at the end of
+        // the bitcode. If the bitcode file is old, the symbol table will be
+        // at the end instead and will not have been seen yet. In this case,
+        // just finish the parse now.
+        if (LazyStreamer && SeenValueSymbolTable) {
+          NextUnreadBit = Stream.GetCurrentBitNo();
+          DEBUG(dbgs() << "<- ParseModule\n");
+          return false;
+        }
+        break;
+      case naclbitc::USELIST_BLOCK_ID:
+        if (ParseUseLists())
+          return true;
+        break;
+      }
+      continue;
+
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+
+    // Read a record.
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case naclbitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 and #1 are supported so far.
+      unsigned module_version = Record[0];
+      switch (module_version) {
+        default: return Error("Unknown bitstream version!");
+        case 0:
+          UseRelativeIDs = false;
+          break;
+        case 1:
+          UseRelativeIDs = true;
+          break;
+      }
+      break;
+    }
+    case naclbitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_ASM record");
+      TheModule->setModuleInlineAsm(S);
+      break;
+    }
+    case naclbitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
+      // FIXME: Remove in 4.0.
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_DEPLIB record");
+      // Ignore value.
+      break;
+    }
+    case naclbitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_SECTIONNAME record");
+      SectionTable.push_back(S);
+      break;
+    }
+    case naclbitc::MODULE_CODE_GCNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_GCNAME record");
+      GCTable.push_back(S);
+      break;
+    }
+    // GLOBALVAR: [pointer type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
+    case naclbitc::MODULE_CODE_GLOBALVAR:
+      return Error("Invalid MODULE_CODE_GLOBALVAR record");
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    case naclbitc::MODULE_CODE_FUNCTION: {
+      if (Record.size() < 4)
+        return Error("Invalid MODULE_CODE_FUNCTION record");
+      Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+      FunctionType *FTy =
+        dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+      if (!FTy)
+        return Error("Function not a pointer to function type!");
+
+      Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                        "", TheModule);
+
+      Func->setCallingConv(GetDecodedCallingConv(Record[1]));
+      bool isProto = Record[2];
+      Func->setLinkage(GetDecodedLinkage(Record[3]));
+      ValueList.push_back(Func);
+
+      // If this is a function with a body, remember the prototype we are
+      // creating now, so that we can match up the body with them later.
+      if (!isProto) {
+        FunctionsWithBodies.push_back(Func);
+        if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+      }
+      break;
+    }
+    // ALIAS: [alias type, aliasee val#, linkage]
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    case naclbitc::MODULE_CODE_ALIAS: {
+      if (Record.size() < 3)
+        return Error("Invalid MODULE_ALIAS record");
+      Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_ALIAS record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+
+      GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+                                           "", 0, TheModule);
+      // Old bitcode files didn't have visibility field.
+      if (Record.size() > 3)
+        NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+      ValueList.push_back(NewGA);
+      AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+      break;
+    }
+    /// MODULE_CODE_PURGEVALS: [numvals]
+    case naclbitc::MODULE_CODE_PURGEVALS:
+      // Trim down the value list to the specified size.
+      if (Record.size() < 1 || Record[0] > ValueList.size())
+        return Error("Invalid MODULE_PURGEVALS record");
+      ValueList.shrinkTo(Record[0]);
+      break;
+    }
+    Record.clear();
+  }
+}
+
+bool NaClBitcodeReader::ParseBitcodeInto(Module *M) {
+  TheModule = 0;
+
+  // PNaCl does not support different DataLayouts in pexes, so we
+  // implicitly set the DataLayout to the following default.
+  //
+  // This is not usually needed by the backend, but it might be used
+  // by IR passes that the PNaCl translator runs.  We set this in the
+  // reader rather than in pnacl-llc so that 'opt' will also use the
+  // correct DataLayout if it is run on a pexe.
+  M->setDataLayout("e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+                   "f32:32:32-f64:64:64-p:32:32:32-v128:32:32");
+
+  if (InitStream()) return true;
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return false;
+
+    NaClBitstreamEntry Entry =
+      Stream.advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      Error("malformed module file");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      return false;
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      case naclbitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case naclbitc::MODULE_BLOCK_ID:
+        // Reject multiple MODULE_BLOCK's in a single bitstream.
+        if (TheModule)
+          return Error("Multiple MODULE_BLOCKs in same stream");
+        TheModule = M;
+        if (ParseModule(false))
+          return true;
+        if (LazyStreamer) return false;
+        break;
+      default:
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    case NaClBitstreamEntry::Record:
+      // There should be no records in the top-level of blocks.
+
+      // The ranlib in Xcode 4 will align archive members by appending newlines
+      // to the end of them. If this file size is a multiple of 4 but not 8, we
+      // have to read and ignore these final 4 bytes :-(
+      if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 &&
+          Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a &&
+          Stream.AtEndOfStream())
+        return false;
+
+      return Error("Invalid record at top-level");
+    }
+  }
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
+  DEBUG(dbgs() << "-> ParseFunctionBody\n");
+  if (Stream.EnterSubBlock(naclbitc::FUNCTION_BLOCK_ID))
+    return Error("Malformed block record");
+
+  InstructionList.clear();
+  unsigned ModuleValueListSize = ValueList.size();
+
+  // Add all the function arguments to the value table.
+  for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ValueList.push_back(I);
+
+  unsigned NextValueNo = ValueList.size();
+  BasicBlock *CurBB = 0;
+  unsigned CurBBNo = 0;
+
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      return Error("Bitcode error in function block");
+    case NaClBitstreamEntry::EndBlock:
+      goto OutOfRecordLoop;
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      default:  // Skip unknown content.
+        dbgs() << "default skip block\n";
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case naclbitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants())
+          return true;
+        NextValueNo = ValueList.size();
+        break;
+      case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        break;
+      }
+      continue;
+
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Instruction *I = 0;
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: // Default behavior: reject
+      return Error("Unknown instruction");
+    case naclbitc::FUNC_CODE_DECLAREBLOCKS:     // DECLAREBLOCKS: [nblocks]
+      if (Record.size() < 1 || Record[0] == 0)
+        return Error("Invalid DECLAREBLOCKS record");
+      // Create all the basic blocks for the function.
+      FunctionBBs.resize(Record[0]);
+      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+        FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+      CurBB = FunctionBBs[0];
+      continue;
+
+    case naclbitc::FUNC_CODE_INST_BINOP: {
+      // BINOP: [opval, opval, opcode[, flags]]
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (popValue(Record, &OpNum, NextValueNo, &LHS) ||
+          popValue(Record, &OpNum, NextValueNo, &RHS) ||
+          OpNum+1 > Record.size())
+        return Error("Invalid BINOP record");
+
+      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+      if (Opc == -1) return Error("Invalid BINOP record");
+      I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+      InstructionList.push_back(I);
+      if (OpNum < Record.size()) {
+        if (Opc == Instruction::Add ||
+            Opc == Instruction::Sub ||
+            Opc == Instruction::Mul ||
+            Opc == Instruction::Shl) {
+          if (Record[OpNum] & (1 << naclbitc::OBO_NO_SIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+          if (Record[OpNum] & (1 << naclbitc::OBO_NO_UNSIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+        } else if (Opc == Instruction::SDiv ||
+                   Opc == Instruction::UDiv ||
+                   Opc == Instruction::LShr ||
+                   Opc == Instruction::AShr) {
+          if (Record[OpNum] & (1 << naclbitc::PEO_EXACT))
+            cast<BinaryOperator>(I)->setIsExact(true);
+        } else if (isa<FPMathOperator>(I)) {
+          FastMathFlags FMF;
+          if (0 != (Record[OpNum] & (1 << naclbitc::FPO_UNSAFE_ALGEBRA)))
+            FMF.setUnsafeAlgebra();
+          if (0 != (Record[OpNum] & (1 << naclbitc::FPO_NO_NANS)))
+            FMF.setNoNaNs();
+          if (0 != (Record[OpNum] & (1 << naclbitc::FPO_NO_INFS)))
+            FMF.setNoInfs();
+          if (0 != (Record[OpNum] & (1 << naclbitc::FPO_NO_SIGNED_ZEROS)))
+            FMF.setNoSignedZeros();
+          if (0 != (Record[OpNum] & (1 << naclbitc::FPO_ALLOW_RECIPROCAL)))
+            FMF.setAllowReciprocal();
+          if (FMF.any())
+            I->setFastMathFlags(FMF);
+        }
+
+      }
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, destty, castopc]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (popValue(Record, &OpNum, NextValueNo, &Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid CAST record");
+
+      Type *ResTy = getTypeByID(Record[OpNum]);
+      int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+      if (Opc == -1 || ResTy == 0)
+        return Error("Invalid CAST record");
+      I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_INBOUNDS_GEP:
+    case naclbitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+      unsigned OpNum = 0;
+      Value *BasePtr;
+      if (popValue(Record, &OpNum, NextValueNo, &BasePtr))
+        return Error("Invalid GEP record");
+
+      SmallVector<Value*, 16> GEPIdx;
+      while (OpNum != Record.size()) {
+        Value *Op;
+        if (popValue(Record, &OpNum, NextValueNo, &Op))
+          return Error("Invalid GEP record");
+        GEPIdx.push_back(Op);
+      }
+
+      I = GetElementPtrInst::Create(BasePtr, GEPIdx);
+      InstructionList.push_back(I);
+      if (BitCode == naclbitc::FUNC_CODE_INST_INBOUNDS_GEP)
+        cast<GetElementPtrInst>(I)->setIsInBounds(true);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_EXTRACTVAL: {
+                                       // EXTRACTVAL: [opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (popValue(Record, &OpNum, NextValueNo, &Agg))
+        return Error("Invalid EXTRACTVAL record");
+
+      SmallVector<unsigned, 4> EXTRACTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid EXTRACTVAL index");
+        EXTRACTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_INSERTVAL: {
+                           // INSERTVAL: [opval, opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (popValue(Record, &OpNum, NextValueNo, &Agg))
+        return Error("Invalid INSERTVAL record");
+      Value *Val;
+      if (popValue(Record, &OpNum, NextValueNo, &Val))
+        return Error("Invalid INSERTVAL record");
+
+      SmallVector<unsigned, 4> INSERTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid INSERTVAL index");
+        INSERTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, opval, opval]
+      // obsolete form of select
+      // handles select i1 ... in old bitcode
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (popValue(Record, &OpNum, NextValueNo, &TrueVal) ||
+          popValue(Record, &OpNum, NextValueNo, &FalseVal) ||
+          popValue(Record, &OpNum, NextValueNo, &Cond))
+        return Error("Invalid SELECT record");
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [opval, opval, pred]
+      // new form of select
+      // handles select i1 or select [N x i1]
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (popValue(Record, &OpNum, NextValueNo, &TrueVal) ||
+          popValue(Record, &OpNum, NextValueNo, &FalseVal) ||
+          popValue(Record, &OpNum, NextValueNo, &Cond))
+        return Error("Invalid SELECT record");
+
+      // select condition can be either i1 or [N x i1]
+      if (VectorType* vector_type =
+          dyn_cast<VectorType>(Cond->getType())) {
+        // expect <n x i1>
+        if (vector_type->getElementType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      } else {
+        // expect i1
+        if (Cond->getType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      }
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opval, opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Idx;
+      if (popValue(Record, &OpNum, NextValueNo, &Vec) ||
+          popValue(Record, &OpNum, NextValueNo, &Idx))
+        return Error("Invalid EXTRACTELT record");
+      I = ExtractElementInst::Create(Vec, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [opval, opval, opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Elt, *Idx;
+      if (popValue(Record, &OpNum, NextValueNo, &Vec) ||
+          popValue(Record, &OpNum, NextValueNo, &Elt) ||
+          popValue(Record, &OpNum, NextValueNo, &Idx))
+        return Error("Invalid INSERTELT record");
+      I = InsertElementInst::Create(Vec, Elt, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval, opval, opval]
+      unsigned OpNum = 0;
+      Value *Vec1, *Vec2, *Mask;
+      if (popValue(Record, &OpNum, NextValueNo, &Vec1) ||
+          popValue(Record, &OpNum, NextValueNo, &Vec2))
+        return Error("Invalid SHUFFLEVEC record");
+
+      if (popValue(Record, &OpNum, NextValueNo, &Mask))
+        return Error("Invalid SHUFFLEVEC record");
+      I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_CMP:   // CMP: [opval, opval, pred]
+      // Old form of ICmp/FCmp returning bool
+      // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+      // both legal on vectors but had different behaviour.
+    case naclbitc::FUNC_CODE_INST_CMP2: { // CMP2: [opval, opval, pred]
+      // FCmp/ICmp returning bool or vector of bool
+
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (popValue(Record, &OpNum, NextValueNo, &LHS) ||
+          popValue(Record, &OpNum, NextValueNo, &RHS) ||
+          OpNum+1 != Record.size())
+        return Error("Invalid CMP record");
+
+      if (LHS->getType()->isFPOrFPVectorTy())
+        I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+      else
+        I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_RET: // RET: [opval<optional>]
+      {
+        unsigned Size = Record.size();
+        if (Size == 0) {
+          I = ReturnInst::Create(Context);
+          InstructionList.push_back(I);
+          break;
+        }
+
+        unsigned OpNum = 0;
+        Value *Op = NULL;
+        if (popValue(Record, &OpNum, NextValueNo, &Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
+
+        I = ReturnInst::Create(Context, Op);
+        InstructionList.push_back(I);
+        break;
+      }
+    case naclbitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+      if (Record.size() != 1 && Record.size() != 3)
+        return Error("Invalid BR record");
+      BasicBlock *TrueDest = getBasicBlock(Record[0]);
+      if (TrueDest == 0)
+        return Error("Invalid BR record");
+
+      if (Record.size() == 1) {
+        I = BranchInst::Create(TrueDest);
+        InstructionList.push_back(I);
+      }
+      else {
+        BasicBlock *FalseDest = getBasicBlock(Record[1]);
+        Value *Cond = getValue(Record, 2, NextValueNo);
+        if (FalseDest == 0 || Cond == 0)
+          return Error("Invalid BR record");
+        I = BranchInst::Create(TrueDest, FalseDest, Cond);
+        InstructionList.push_back(I);
+      }
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+      // New SwitchInst format with case ranges.
+      if (Record.size() < 4)
+        return Error("Invalid SWITCH record");
+      Type *OpTy = getTypeByID(Record[0]);
+      unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
+
+      Value *Cond = getValue(Record, 1, NextValueNo);
+      BasicBlock *Default = getBasicBlock(Record[2]);
+      if (OpTy == 0 || Cond == 0 || Default == 0)
+        return Error("Invalid SWITCH record");
+
+      unsigned NumCases = Record[3];
+
+      SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+      InstructionList.push_back(SI);
+
+      unsigned CurIdx = 4;
+      for (unsigned i = 0; i != NumCases; ++i) {
+        IntegersSubsetToBB CaseBuilder;
+        unsigned NumItems = Record[CurIdx++];
+        for (unsigned ci = 0; ci != NumItems; ++ci) {
+          bool isSingleNumber = Record[CurIdx++];
+
+          APInt Low;
+          unsigned ActiveWords = 1;
+          if (ValueBitWidth > 64)
+            ActiveWords = Record[CurIdx++];
+          Low = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+                              ValueBitWidth);
+          CurIdx += ActiveWords;
+
+          if (!isSingleNumber) {
+            ActiveWords = 1;
+            if (ValueBitWidth > 64)
+              ActiveWords = Record[CurIdx++];
+            APInt High =
+                ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+                              ValueBitWidth);
+
+            CaseBuilder.add(IntItem::fromType(OpTy, Low),
+                            IntItem::fromType(OpTy, High));
+            CurIdx += ActiveWords;
+          } else
+            CaseBuilder.add(IntItem::fromType(OpTy, Low));
+        }
+        BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
+        IntegersSubset Case = CaseBuilder.getCase();
+        SI->addCase(Case, DestBB);
+      }
+      I = SI;
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+      if (Record.size() < 2)
+        return Error("Invalid INDIRECTBR record");
+      Type *OpTy = getTypeByID(Record[0]);
+      Value *Address = getValue(Record, 1, NextValueNo);
+      if (OpTy == 0 || Address == 0)
+        return Error("Invalid INDIRECTBR record");
+      unsigned NumDests = Record.size()-2;
+      IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+      InstructionList.push_back(IBI);
+      for (unsigned i = 0, e = NumDests; i != e; ++i) {
+        if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+          IBI->addDestination(DestBB);
+        } else {
+          delete IBI;
+          return Error("Invalid INDIRECTBR record!");
+        }
+      }
+      I = IBI;
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_INVOKE:
+      return Error("Invoke is not allowed");
+      break;
+    case naclbitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
+      unsigned Idx = 0;
+      Value *Val = 0;
+      if (popValue(Record, &Idx, NextValueNo, &Val))
+        return Error("Invalid RESUME record");
+      I = ResumeInst::Create(Val);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+      I = new UnreachableInst(Context);
+      InstructionList.push_back(I);
+      break;
+    case naclbitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+      if (Record.size() < 1 || ((Record.size()-1)&1))
+        return Error("Invalid PHI record");
+      Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid PHI record");
+
+      PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+      InstructionList.push_back(PN);
+
+      for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+        Value *V;
+        // With the new function encoding, it is possible that operands have
+        // negative IDs (for forward references).  Use a signed VBR
+        // representation to keep the encoding small.
+        if (UseRelativeIDs)
+          V = getValueSigned(Record, 1+i, NextValueNo);
+        else
+          V = getValue(Record, 1+i, NextValueNo);
+        BasicBlock *BB = getBasicBlock(Record[2+i]);
+        if (!V || !BB) return Error("Invalid PHI record");
+        PN->addIncoming(V, BB);
+      }
+      I = PN;
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_LANDINGPAD: {
+      // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?]
+      unsigned Idx = 0;
+      if (Record.size() < 4)
+        return Error("Invalid LANDINGPAD record");
+      Type *Ty = getTypeByID(Record[Idx++]);
+      if (!Ty) return Error("Invalid LANDINGPAD record");
+      Value *PersFn = 0;
+      if (popValue(Record, &Idx, NextValueNo, &PersFn))
+        return Error("Invalid LANDINGPAD record");
+
+      bool IsCleanup = !!Record[Idx++];
+      unsigned NumClauses = Record[Idx++];
+      LandingPadInst *LP = LandingPadInst::Create(Ty, PersFn, NumClauses);
+      LP->setCleanup(IsCleanup);
+      for (unsigned J = 0; J != NumClauses; ++J) {
+        LandingPadInst::ClauseType CT =
+          LandingPadInst::ClauseType(Record[Idx++]); (void)CT;
+        Value *Val;
+
+        if (popValue(Record, &Idx, NextValueNo, &Val)) {
+          delete LP;
+          return Error("Invalid LANDINGPAD record");
+        }
+
+        assert((CT != LandingPadInst::Catch ||
+                !isa<ArrayType>(Val->getType())) &&
+               "Catch clause has a invalid type!");
+        assert((CT != LandingPadInst::Filter ||
+                isa<ArrayType>(Val->getType())) &&
+               "Filter clause has invalid type!");
+        LP->addClause(Val);
+      }
+
+      I = LP;
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [op, align]
+      if (Record.size() != 2)
+        return Error("Invalid ALLOCA record");
+      Value *Size;
+      unsigned OpNum = 0;
+      if (popValue(Record, &OpNum, NextValueNo, &Size))
+        return Error("Invalid ALLOCA record");
+      unsigned Align = Record[1];
+      I = new AllocaInst(Type::getInt8Ty(Context), Size, (1 << Align) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_LOAD: { // LOAD: [op, align, vol]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (popValue(Record, &OpNum, NextValueNo, &Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid LOAD record");
+
+      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_LOADATOMIC: {
+       // LOADATOMIC: [op, align, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (popValue(Record, &OpNum, NextValueNo, &Op) ||
+          OpNum+4 != Record.size())
+        return Error("Invalid LOADATOMIC record");
+
+
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      if (Ordering == NotAtomic || Ordering == Release ||
+          Ordering == AcquireRelease)
+        return Error("Invalid LOADATOMIC record");
+      if (Ordering != NotAtomic && Record[OpNum] == 0)
+        return Error("Invalid LOADATOMIC record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+
+      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+                       Ordering, SynchScope);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_STORE: { // STORE2:[ptr, val, align, vol]
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (popValue(Record, &OpNum, NextValueNo, &Ptr) ||
+          popValue(Record, &OpNum, NextValueNo, &Val) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid STORE record");
+
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_STOREATOMIC: {
+      // STOREATOMIC: [ptr, val, align, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (popValue(Record, &OpNum, NextValueNo, &Ptr) ||
+          popValue(Record, &OpNum, NextValueNo, &Val) ||
+          OpNum+4 != Record.size())
+        return Error("Invalid STOREATOMIC record");
+
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      if (Ordering == NotAtomic || Ordering == Acquire ||
+          Ordering == AcquireRelease)
+        return Error("Invalid STOREATOMIC record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+      if (Ordering != NotAtomic && Record[OpNum] == 0)
+        return Error("Invalid STOREATOMIC record");
+
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1,
+                        Ordering, SynchScope);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_CMPXCHG: {
+      // CMPXCHG:[ptr, cmp, new, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Ptr, *Cmp, *New;
+      if (popValue(Record, &OpNum, NextValueNo, &Ptr) ||
+          popValue(Record, &OpNum, NextValueNo, &Cmp) ||
+          popValue(Record, &OpNum, NextValueNo, &New) ||
+          OpNum+3 != Record.size())
+        return Error("Invalid CMPXCHG record");
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]);
+      if (Ordering == NotAtomic || Ordering == Unordered)
+        return Error("Invalid CMPXCHG record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]);
+      I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope);
+      cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_ATOMICRMW: {
+      // ATOMICRMW:[ptr, val, op, vol, ordering, synchscope]
+      unsigned OpNum = 0;
+      Value *Ptr, *Val;
+      if (popValue(Record, &OpNum, NextValueNo, &Ptr) ||
+          popValue(Record, &OpNum, NextValueNo, &Val) ||
+          OpNum+4 != Record.size())
+        return Error("Invalid ATOMICRMW record");
+      AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]);
+      if (Operation < AtomicRMWInst::FIRST_BINOP ||
+          Operation > AtomicRMWInst::LAST_BINOP)
+        return Error("Invalid ATOMICRMW record");
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]);
+      if (Ordering == NotAtomic || Ordering == Unordered)
+        return Error("Invalid ATOMICRMW record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]);
+      I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope);
+      cast<AtomicRMWInst>(I)->setVolatile(Record[OpNum+1]);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope]
+      if (2 != Record.size())
+        return Error("Invalid FENCE record");
+      AtomicOrdering Ordering = GetDecodedOrdering(Record[0]);
+      if (Ordering == NotAtomic || Ordering == Unordered ||
+          Ordering == Monotonic)
+        return Error("Invalid FENCE record");
+      SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]);
+      I = new FenceInst(Context, Ordering, SynchScope);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_CALL: {
+      // CALL: [cc, fnid, arg0, arg1...]
+      if (Record.size() < 2)
+        return Error("Invalid CALL record");
+
+      unsigned CCInfo = Record[0];
+
+      unsigned OpNum = 1;
+      Value *Callee;
+      if (popValue(Record, &OpNum, NextValueNo, &Callee))
+        return Error("Invalid CALL record");
+
+      PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+      FunctionType *FTy = 0;
+      if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+      if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+        return Error("Invalid CALL record");
+
+      SmallVector<Value*, 16> Args;
+      // Read the fixed params.
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        if (FTy->getParamType(i)->isLabelTy())
+          Args.push_back(getBasicBlock(Record[OpNum]));
+        else
+          Args.push_back(getValue(Record, OpNum, NextValueNo));
+        if (Args.back() == 0) return Error("Invalid CALL record");
+      }
+
+      // Read type/value pairs for varargs params.
+      if (!FTy->isVarArg()) {
+        if (OpNum != Record.size())
+          return Error("Invalid CALL record");
+      } else {
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (popValue(Record, &OpNum, NextValueNo, &Op))
+            return Error("Invalid CALL record");
+          Args.push_back(Op);
+        }
+      }
+
+      I = CallInst::Create(Callee, Args);
+      InstructionList.push_back(I);
+      cast<CallInst>(I)->setCallingConv(GetDecodedCallingConv(CCInfo>>1));
+      cast<CallInst>(I)->setTailCall(CCInfo & 1);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+      if (Record.size() < 3)
+        return Error("Invalid VAARG record");
+      Type *OpTy = getTypeByID(Record[0]);
+      Value *Op = getValue(Record, 1, NextValueNo);
+      Type *ResTy = getTypeByID(Record[2]);
+      if (!OpTy || !Op || !ResTy)
+        return Error("Invalid VAARG record");
+      I = new VAArgInst(Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_FORWARDTYPEREF:
+      // Build corresponding forward reference.
+      if (Record.size() != 2 ||
+          ValueList.createValueFwdRef(Record[0], getTypeByID(Record[1])))
+        return Error("Invalid FORWARDTYPEREF record");
+      continue;
+    }
+
+    // Add instruction to end of current BB.  If there is no current BB, reject
+    // this file.
+    if (CurBB == 0) {
+      delete I;
+      return Error("Invalid instruction with no BB");
+    }
+    CurBB->getInstList().push_back(I);
+
+    // If this was a terminator instruction, move to the next block.
+    if (isa<TerminatorInst>(I)) {
+      ++CurBBNo;
+      CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+    }
+
+    // Non-void values get registered in the value table for future use.
+    if (I && !I->getType()->isVoidTy())
+      ValueList.AssignValue(I, NextValueNo++);
+  }
+
+OutOfRecordLoop:
+
+  // Check the function list for unresolved values.
+  if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+    if (A->getParent() == 0) {
+      // We found at least one unresolved value.  Nuke them all to avoid leaks.
+      for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+        if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+          A->replaceAllUsesWith(UndefValue::get(A->getType()));
+          delete A;
+        }
+      }
+      return Error("Never resolved value found in function!");
+    }
+  }
+
+  // See if anything took the address of blocks in this function.  If so,
+  // resolve them now.
+  DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+    BlockAddrFwdRefs.find(F);
+  if (BAFRI != BlockAddrFwdRefs.end()) {
+    std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+    for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+      unsigned BlockIdx = RefList[i].first;
+      if (BlockIdx >= FunctionBBs.size())
+        return Error("Invalid blockaddress block #");
+
+      GlobalVariable *FwdRef = RefList[i].second;
+      FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+      FwdRef->eraseFromParent();
+    }
+
+    BlockAddrFwdRefs.erase(BAFRI);
+  }
+
+  // Trim the value list down to the size it was before we parsed this function.
+  ValueList.shrinkTo(ModuleValueListSize);
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+  DEBUG(dbgs() << "-> ParseFunctionBody\n");
+  return false;
+}
+
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool NaClBitcodeReader::FindFunctionInStream(Function *F,
+       DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+  while (DeferredFunctionInfoIterator->second == 0) {
+    if (Stream.AtEndOfStream())
+      return Error("Could not find Function in stream");
+    // ParseModule will parse the next body in the stream and set its
+    // position in the DeferredFunctionInfo map.
+    if (ParseModule(true)) return true;
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool NaClBitcodeReader::isMaterializable(const GlobalValue *GV) const {
+  if (const Function *F = dyn_cast<Function>(GV)) {
+    return F->isDeclaration() &&
+      DeferredFunctionInfo.count(const_cast<Function*>(F));
+  }
+  return false;
+}
+
+bool NaClBitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  Function *F = dyn_cast<Function>(GV);
+  // If it's not a function or is already material, ignore the request.
+  if (!F || !F->isMaterializable()) return false;
+
+  DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+  assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+  // If its position is recorded as 0, its body is somewhere in the stream
+  // but we haven't seen it yet.
+  if (DFII->second == 0)
+    if (LazyStreamer && FindFunctionInStream(F, DFII)) return true;
+
+  // Move the bit stream to the saved position of the deferred function body.
+  Stream.JumpToBit(DFII->second);
+
+  if (ParseFunctionBody(F)) {
+    if (ErrInfo) *ErrInfo = ErrorString;
+    return true;
+  }
+
+  // Upgrade any old intrinsic calls in the function.
+  for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+       E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+    }
+  }
+
+  return false;
+}
+
+bool NaClBitcodeReader::isDematerializable(const GlobalValue *GV) const {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F || F->isDeclaration())
+    return false;
+  // @LOCALMOD-START
+  // Don't dematerialize functions with BBs which have their address taken;
+  // it will cause any referencing blockAddress constants to also be destroyed,
+  // but because they are GVs, they need to stay around until PassManager
+  // finalization.
+  for (Function::const_iterator BB = F->begin(); BB != F->end(); ++BB) {
+    if (BB->hasAddressTaken())
+      return false;
+  }
+  // @LOCALMOD-END
+  return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void NaClBitcodeReader::Dematerialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If this function isn't dematerializable, this is a noop.
+  if (!F || !isDematerializable(F))
+    return;
+
+  assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+  // Just forget the function body, we can remat it later.
+  F->deleteBody();
+}
+
+
+bool NaClBitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+  assert(M == TheModule &&
+         "Can only Materialize the Module this NaClBitcodeReader is attached to.");
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
+    if (F->isMaterializable() &&
+        Materialize(F, ErrInfo))
+      return true;
+
+  // At this point, if there are any function bodies, the current bit is
+  // pointing to the END_BLOCK record after them. Now make sure the rest
+  // of the bits in the module have been read.
+  if (NextUnreadBit)
+    ParseModule(true);
+
+  // Upgrade any intrinsic calls that slipped through (should not happen!) and
+  // delete the old functions to clean up. We can't do this unless the entire
+  // module is materialized because there could always be another function body
+  // with calls to the old function.
+  for (std::vector<std::pair<Function*, Function*> >::iterator I =
+       UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+      if (!I->first->use_empty())
+        I->first->replaceAllUsesWith(I->second);
+      I->first->eraseFromParent();
+    }
+  }
+  std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+  return false;
+}
+
+bool NaClBitcodeReader::InitStream() {
+  if (LazyStreamer) return InitLazyStream();
+  return InitStreamFromBuffer();
+}
+
+bool NaClBitcodeReader::InitStreamFromBuffer() {
+  const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
+  const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  if (Header.Read(BufPtr, BufEnd))
+    return Error("Invalid PNaCl bitcode header");
+
+  StreamFile.reset(new NaClBitstreamReader(BufPtr, BufEnd));
+  Stream.init(*StreamFile);
+
+  return AcceptHeader();
+}
+
+bool NaClBitcodeReader::InitLazyStream() {
+  StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+  if (Header.Read(Bytes))
+    return Error("Invalid PNaCl bitcode header");
+
+  StreamFile.reset(new NaClBitstreamReader(Bytes, Header.getHeaderSize()));
+  Stream.init(*StreamFile);
+  return AcceptHeader();
+}
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getNaClLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
+                                       LLVMContext& Context,
+                                       std::string *ErrMsg,
+                                       bool AcceptSupportedOnly) {
+  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+  NaClBitcodeReader *R =
+      new NaClBitcodeReader(Buffer, Context, AcceptSupportedOnly);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  // Have the NaClBitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(true);
+
+  R->materializeForwardReferencedFunctions();
+
+  return M;
+}
+
+
+Module *llvm::getNaClStreamedBitcodeModule(const std::string &name,
+                                           DataStreamer *streamer,
+                                           LLVMContext &Context,
+                                           std::string *ErrMsg,
+                                           bool AcceptSupportedOnly) {
+  Module *M = new Module(name, Context);
+  NaClBitcodeReader *R =
+      new NaClBitcodeReader(streamer, Context, AcceptSupportedOnly);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  R->setBufferOwned(false); // no buffer to delete
+
+  R->materializeForwardReferencedFunctions();
+
+  return M;
+}
+
+/// NaClParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+                                   std::string *ErrMsg,
+                                   bool AcceptSupportedOnly){
+  Module *M = getNaClLazyBitcodeModule(Buffer, Context, ErrMsg,
+                                       AcceptSupportedOnly);
+  if (!M) return 0;
+
+  // Don't let the NaClBitcodeReader dtor delete 'Buffer', regardless of whether
+  // there was an error.
+  static_cast<NaClBitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+  // Read in the entire module, and destroy the NaClBitcodeReader.
+  if (M->MaterializeAllPermanently(ErrMsg)) {
+    delete M;
+    return 0;
+  }
+
+  // TODO: Restore the use-lists to the in-memory state when the bitcode was
+  // written.  We must defer until the Module has been fully materialized.
+
+  return M;
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
new file mode 100644
index 0000000000..454875796a
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
@@ -0,0 +1,297 @@
+//===- NaClBitcodeReader.h ------------------------------------*- C++ -*-===//
+//     Internal NaClBitcodeReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the NaClBitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NACL_BITCODE_READER_H
+#define NACL_BITCODE_READER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ValueHandle.h"
+#include <vector>
+
+namespace llvm {
+  class MemoryBuffer;
+  class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+//                          NaClBitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class NaClBitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+
+  /// ResolveConstants - As we resolve forward-referenced constants, we add
+  /// information about them to this vector.  This allows us to resolve them in
+  /// bulk instead of resolving each reference at a time.  See the code in
+  /// ResolveConstantForwardRefs for more information about this.
+  ///
+  /// The key of this vector is the placeholder constant, the value is the slot
+  /// number that holds the resolved value.
+  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+  ResolveConstantsTy ResolveConstants;
+  LLVMContext &Context;
+public:
+  NaClBitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  ~NaClBitcodeReaderValueList() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+  }
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) {
+    ValuePtrs.push_back(V);
+  }
+
+  void clear() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+    ValuePtrs.clear();
+  }
+
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+
+  Value *back() const { return ValuePtrs.back(); }
+    void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+
+  // Declares the type of the forward-referenced value Idx.  Returns
+  // true if an error occurred.  It is an error if Idx's type has
+  // already been declared.
+  bool createValueFwdRef(unsigned Idx, Type *Ty);
+
+  // Declares the type of the forward-referenced constant Idx. Returns
+  // 0 if an error occurred.
+  // TODO(kschimpf) Convert these to be like createValueFwdRef and
+  // getValueFwdRef.
+  Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+
+  // Gets the forward reference value for Idx.
+  Value *getValueFwdRef(unsigned Idx);
+
+  // Gets the corresponding constant defining the address of the
+  // corresponding global variable defined by Idx, if already defined.
+  // Otherwise, creates a forward reference for Idx, and returns the
+  // placeholder constant for the address of the corresponding global
+  // variable defined by Idx.
+  Constant *getOrCreateGlobalVarRef(unsigned Idx, Module* M);
+
+  // Assigns Idx to the given value (if new), or assigns V to Idx (if Idx
+  // was forward referenced).
+  void AssignValue(Value *V, unsigned Idx);
+
+  // Assigns Idx to the given global variable. If the Idx currently has
+  // a forward reference (built by createGlobalVarFwdRef(unsigned Idx)),
+  // replaces uses of the global variable forward reference with the
+  // value GV.
+  void AssignGlobalVar(GlobalVariable *GV, unsigned Idx);
+
+  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+  /// resolves any forward references.
+  void ResolveConstantForwardRefs();
+};
+
+
+class NaClBitcodeReader : public GVMaterializer {
+  NaClBitcodeHeader Header;  // Header fields of the PNaCl bitcode file.
+  LLVMContext &Context;
+  Module *TheModule;
+  MemoryBuffer *Buffer;
+  bool BufferOwned;
+  OwningPtr<NaClBitstreamReader> StreamFile;
+  NaClBitstreamCursor Stream;
+  DataStreamer *LazyStreamer;
+  uint64_t NextUnreadBit;
+  bool SeenValueSymbolTable;
+
+  const char *ErrorString;
+
+  std::vector<Type*> TypeList;
+  NaClBitcodeReaderValueList ValueList;
+  SmallVector<Instruction *, 64> InstructionList;
+  SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords;
+
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+
+  /// FunctionBBs - While parsing a function body, this is a list of the basic
+  /// blocks for the function.
+  std::vector<BasicBlock*> FunctionBBs;
+
+  // When reading the module header, this list is populated with functions that
+  // have bodies later in the file.
+  std::vector<Function*> FunctionsWithBodies;
+
+  // When intrinsic functions are encountered which require upgrading they are
+  // stored here with their replacement function.
+  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  UpgradedIntrinsicMap UpgradedIntrinsics;
+
+  // Several operations happen after the module header has been read, but
+  // before function bodies are processed. This keeps track of whether
+  // we've done this yet.
+  bool SeenFirstFunctionBody;
+
+  /// DeferredFunctionInfo - When function bodies are initially scanned, this
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
+  /// are resolved lazily when functions are loaded.
+  typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+  DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+  /// UseRelativeIDs - Indicates that we are using a new encoding for
+  /// instruction operands where most operands in the current
+  /// FUNCTION_BLOCK are encoded relative to the instruction number,
+  /// for a more compact encoding.  Some instruction operands are not
+  /// relative to the instruction ID: basic block numbers, and types.
+  /// Once the old style function blocks have been phased out, we would
+  /// not need this flag.
+  bool UseRelativeIDs;
+
+  /// \brief True if we should only accept supported bitcode format.
+  bool AcceptSupportedBitcodeOnly;
+
+public:
+  explicit NaClBitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
+                             bool AcceptSupportedOnly = true)
+    : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+      LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ErrorString(0), ValueList(C),
+      SeenFirstFunctionBody(false), UseRelativeIDs(false),
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly) {
+  }
+  explicit NaClBitcodeReader(DataStreamer *streamer, LLVMContext &C,
+                             bool AcceptSupportedOnly = true)
+    : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+      LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ErrorString(0), ValueList(C),
+      SeenFirstFunctionBody(false), UseRelativeIDs(false),
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly) {
+  }
+  ~NaClBitcodeReader() {
+    FreeState();
+  }
+
+  void materializeForwardReferencedFunctions();
+
+  void FreeState();
+
+  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+  /// when the reader is destroyed.
+  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+
+  virtual bool isMaterializable(const GlobalValue *GV) const;
+  virtual bool isDematerializable(const GlobalValue *GV) const;
+  virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+  virtual void Dematerialize(GlobalValue *GV);
+
+  bool Error(const char *Str) {
+    ErrorString = Str;
+    return true;
+  }
+  const char *getErrorString() const { return ErrorString; }
+
+  /// @brief Main interface to parsing a bitcode buffer.
+  /// @returns true if an error occurred.
+  bool ParseBitcodeInto(Module *M);
+
+private:
+  // Returns false if Header is acceptable.
+  bool AcceptHeader() const {
+    return !(Header.IsSupported() ||
+             (!AcceptSupportedBitcodeOnly && Header.IsReadable()));
+  }
+  Type *getTypeByID(unsigned ID);
+  // Returns the value associated with ID. The value must already exist,
+  // or a forward referenced value created by getOrCreateFnVaueByID.
+  Value *getFnValueByID(unsigned ID) {
+    return ValueList.getValueFwdRef(ID);
+  }
+  BasicBlock *getBasicBlock(unsigned ID) const {
+    if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+    return FunctionBBs[ID];
+  }
+
+  /// \brief Read a value out of the specified record from slot '*Slot'.
+  /// Increment *Slot past the number of slots used by the value in the record.
+  /// Return true if there is an error.
+  bool popValue(const SmallVector<uint64_t, 64> &Record, unsigned *Slot,
+                unsigned InstNum, Value **ResVal) {
+    if (*Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[(*Slot)++];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    *ResVal = getFnValueByID(ValNo);
+    return *ResVal == 0;
+  }
+
+  /// getValue -- Version of getValue that returns ResVal directly,
+  /// or 0 if there is an error.
+  Value *getValue(const SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                  unsigned InstNum) {
+    if (Slot == Record.size()) return 0;
+    unsigned ValNo = (unsigned)Record[Slot];
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo);
+  }
+
+  /// getValueSigned -- Like getValue, but decodes signed VBRs.
+  Value *getValueSigned(const SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                        unsigned InstNum) {
+    if (Slot == Record.size()) return 0;
+    unsigned ValNo = (unsigned) NaClDecodeSignRotatedValue(Record[Slot]);
+    // Adjust the ValNo, if it was encoded relative to the InstNum.
+    if (UseRelativeIDs)
+      ValNo = InstNum - ValNo;
+    return getFnValueByID(ValNo);
+  }
+
+  bool ParseModule(bool Resume);
+  bool ParseTypeTable();
+  bool ParseTypeTableBody();
+  bool ParseGlobalVars();
+  bool ParseValueSymbolTable();
+  bool ParseConstants();
+  bool RememberAndSkipFunctionBody();
+  bool ParseFunctionBody(Function *F);
+  bool GlobalCleanup();
+  bool ResolveAliasInits();
+  bool ParseUseLists();
+  bool InitStream();
+  bool InitStreamFromBuffer();
+  bool InitLazyStream();
+  bool FindFunctionInStream(Function *F,
+         DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp
new file mode 100644
index 0000000000..d75c42ca9b
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp
@@ -0,0 +1,374 @@
+//===- NaClBitstreamReader.cpp --------------------------------------------===//
+//     NaClBitstreamReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  NaClBitstreamCursor implementation
+//===----------------------------------------------------------------------===//
+
+void NaClBitstreamCursor::operator=(const NaClBitstreamCursor &RHS) {
+  freeState();
+
+  BitStream = RHS.BitStream;
+  NextChar = RHS.NextChar;
+  CurWord = RHS.CurWord;
+  BitsInCurWord = RHS.BitsInCurWord;
+  CurCodeSize = RHS.CurCodeSize;
+
+  // Copy abbreviations, and bump ref counts.
+  CurAbbrevs = RHS.CurAbbrevs;
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->addRef();
+
+  // Copy block scope and bump ref counts.
+  BlockScope = RHS.BlockScope;
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<NaClBitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->addRef();
+  }
+}
+
+void NaClBitstreamCursor::freeState() {
+  // Free all the Abbrevs.
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->dropRef();
+  CurAbbrevs.clear();
+
+  // Free all the Abbrevs in the block scope.
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<NaClBitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->dropRef();
+  }
+  BlockScope.clear();
+}
+
+/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+/// the block, and return true if the block has an error.
+bool NaClBitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+  // Save the current block's state on BlockScope.
+  BlockScope.push_back(Block(CurCodeSize));
+  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+  // Add the abbrevs specific to this block to the CurAbbrevs list.
+  if (const NaClBitstreamReader::BlockInfo *Info =
+      BitStream->getBlockInfo(BlockID)) {
+    for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) {
+      CurAbbrevs.push_back(Info->Abbrevs[i]);
+      CurAbbrevs.back()->addRef();
+    }
+  }
+
+  // Get the codesize of this block.
+  CurCodeSize.IsFixed = true;
+  CurCodeSize.NumBits = ReadVBR(naclbitc::CodeLenWidth);
+  SkipToFourByteBoundary();
+  unsigned NumWords = Read(naclbitc::BlockSizeWidth);
+  if (NumWordsP) *NumWordsP = NumWords;
+
+  // Validate that this block is sane.
+  if (CurCodeSize.NumBits == 0 || AtEndOfStream())
+    return true;
+
+  return false;
+}
+
+void NaClBitstreamCursor::readAbbreviatedLiteral(
+    const NaClBitCodeAbbrevOp &Op,
+    SmallVectorImpl<uint64_t> &Vals) {
+  assert(Op.isLiteral() && "Not a literal");
+  // If the abbrev specifies the literal value to use, use it.
+  Vals.push_back(Op.getLiteralValue());
+}
+
+void NaClBitstreamCursor::readAbbreviatedField(
+    const NaClBitCodeAbbrevOp &Op,
+    SmallVectorImpl<uint64_t> &Vals) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case NaClBitCodeAbbrevOp::Array:
+  case NaClBitCodeAbbrevOp::Blob:
+    assert(0 && "Should not reach here");
+  case NaClBitCodeAbbrevOp::Fixed:
+    Vals.push_back(Read((unsigned)Op.getEncodingData()));
+    break;
+  case NaClBitCodeAbbrevOp::VBR:
+    Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
+    break;
+  case NaClBitCodeAbbrevOp::Char6:
+    Vals.push_back(NaClBitCodeAbbrevOp::DecodeChar6(Read(6)));
+    break;
+  }
+}
+
+void NaClBitstreamCursor::skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  case NaClBitCodeAbbrevOp::Array:
+  case NaClBitCodeAbbrevOp::Blob:
+    assert(0 && "Should not reach here");
+  case NaClBitCodeAbbrevOp::Fixed:
+    (void)Read((unsigned)Op.getEncodingData());
+    break;
+  case NaClBitCodeAbbrevOp::VBR:
+    (void)ReadVBR64((unsigned)Op.getEncodingData());
+    break;
+  case NaClBitCodeAbbrevOp::Char6:
+    (void)Read(6);
+    break;
+  }
+}
+
+
+
+/// skipRecord - Read the current record and discard it.
+void NaClBitstreamCursor::skipRecord(unsigned AbbrevID) {
+  // Skip unabbreviated records by reading past their entries.
+  if (AbbrevID == naclbitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    (void)Code;
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      (void)ReadVBR64(6);
+    return;
+  }
+
+  const NaClBitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral())
+      continue;
+
+    if (Op.getEncoding() != NaClBitCodeAbbrevOp::Array &&
+        Op.getEncoding() != NaClBitCodeAbbrevOp::Blob) {
+      skipAbbreviatedField(Op);
+      continue;
+    }
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        skipAbbreviatedField(EltEnc);
+      continue;
+    }
+
+    assert(Op.getEncoding() == NaClBitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    unsigned NumElts = ReadVBR(6);
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      NextChar = BitStream->getBitcodeBytes().getExtent();
+      break;
+    }
+
+    // Skip over the blob.
+    JumpToBit(NewEnd);
+  }
+}
+
+unsigned NaClBitstreamCursor::readRecord(unsigned AbbrevID,
+                                         SmallVectorImpl<uint64_t> &Vals,
+                                         StringRef *Blob) {
+  if (AbbrevID == naclbitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      Vals.push_back(ReadVBR64(6));
+    return Code;
+  }
+
+  const NaClBitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral()) {
+      readAbbreviatedLiteral(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() != NaClBitCodeAbbrevOp::Array &&
+        Op.getEncoding() != NaClBitCodeAbbrevOp::Blob) {
+      readAbbreviatedField(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        readAbbreviatedField(EltEnc, Vals);
+      continue;
+    }
+
+    assert(Op.getEncoding() == NaClBitCodeAbbrevOp::Blob);
+    // Blob case.  Read the number of bytes as a vbr6.
+    unsigned NumElts = ReadVBR(6);
+    SkipToFourByteBoundary();  // 32-bit alignment
+
+    // Figure out where the end of this blob will be including tail padding.
+    size_t CurBitPos = GetCurrentBitNo();
+    size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8;
+
+    // If this would read off the end of the bitcode file, just set the
+    // record to empty and return.
+    if (!canSkipToPos(NewEnd/8)) {
+      Vals.append(NumElts, 0);
+      NextChar = BitStream->getBitcodeBytes().getExtent();
+      break;
+    }
+
+    // Otherwise, inform the streamer that we need these bytes in memory.
+    const char *Ptr = (const char*)
+      BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts);
+
+    // If we can return a reference to the data, do so to avoid copying it.
+    if (Blob) {
+      *Blob = StringRef(Ptr, NumElts);
+    } else {
+      // Otherwise, unpack into Vals with zero extension.
+      for (; NumElts; --NumElts)
+        Vals.push_back((unsigned char)*Ptr++);
+    }
+    // Skip over tail padding.
+    JumpToBit(NewEnd);
+  }
+
+  unsigned Code = (unsigned)Vals[0];
+  Vals.erase(Vals.begin());
+  return Code;
+}
+
+
+void NaClBitstreamCursor::ReadAbbrevRecord() {
+  NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+  unsigned NumOpInfo = ReadVBR(5);
+  for (unsigned i = 0; i != NumOpInfo; ++i) {
+    bool IsLiteral = Read(1) ? true : false;
+    if (IsLiteral) {
+      Abbv->Add(NaClBitCodeAbbrevOp(ReadVBR64(8)));
+      continue;
+    }
+
+    NaClBitCodeAbbrevOp::Encoding E = (NaClBitCodeAbbrevOp::Encoding)Read(3);
+    if (NaClBitCodeAbbrevOp::hasEncodingData(E)) {
+      unsigned Data = ReadVBR64(5);
+
+      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
+      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
+      // a slow path in Read() to have to handle reading zero bits.
+      if ((E == NaClBitCodeAbbrevOp::Fixed || E == NaClBitCodeAbbrevOp::VBR) &&
+          Data == 0) {
+        Abbv->Add(NaClBitCodeAbbrevOp(0));
+        continue;
+      }
+      
+      Abbv->Add(NaClBitCodeAbbrevOp(E, Data));
+    } else
+      Abbv->Add(NaClBitCodeAbbrevOp(E));
+  }
+  CurAbbrevs.push_back(Abbv);
+}
+
+bool NaClBitstreamCursor::ReadBlockInfoBlock() {
+  // If this is the second stream to get to the block info block, skip it.
+  if (BitStream->hasBlockInfoRecords())
+    return SkipBlock();
+
+  if (EnterSubBlock(naclbitc::BLOCKINFO_BLOCK_ID)) return true;
+
+  SmallVector<uint64_t, 64> Record;
+  NaClBitstreamReader::BlockInfo *CurBlockInfo = 0;
+
+  // Read all the records for this module.
+  while (1) {
+    NaClBitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+
+    switch (Entry.Kind) {
+    case llvm::NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case llvm::NaClBitstreamEntry::Error:
+      return true;
+    case llvm::NaClBitstreamEntry::EndBlock:
+      return false;
+    case llvm::NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read abbrev records, associate them with CurBID.
+    if (Entry.ID == naclbitc::DEFINE_ABBREV) {
+      if (!CurBlockInfo) return true;
+      ReadAbbrevRecord();
+
+      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
+      // appropriate BlockInfo.
+      NaClBitCodeAbbrev *Abbv = CurAbbrevs.back();
+      CurAbbrevs.pop_back();
+      CurBlockInfo->Abbrevs.push_back(Abbv);
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (readRecord(Entry.ID, Record)) {
+      default: break;  // Default behavior, ignore unknown content.
+      case naclbitc::BLOCKINFO_CODE_SETBID:
+        if (Record.size() < 1) return true;
+        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+        break;
+      case naclbitc::BLOCKINFO_CODE_BLOCKNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 0, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->Name = Name;
+        break;
+      }
+      case naclbitc::BLOCKINFO_CODE_SETRECORDNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 1, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+                                                           Name));
+        break;
+      }
+    }
+  }
+}
diff --git a/lib/Bitcode/NaCl/Writer/CMakeLists.txt b/lib/Bitcode/NaCl/Writer/CMakeLists.txt
new file mode 100644
index 0000000000..f5718fdb88
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMNaClBitWriter
+  NaClBitcodeWriter.cpp
+  NaClValueEnumerator.cpp
+  )
+add_dependencies(LLVMNaClBitWriter intinsics_gen)
diff --git a/lib/Bitcode/NaCl/Writer/LLVMBuild.txt b/lib/Bitcode/NaCl/Writer/LLVMBuild.txt
new file mode 100644
index 0000000000..b41d469100
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/NaCl/Writer/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClBitWriter
+parent = NaClBitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/NaCl/Writer/Makefile b/lib/Bitcode/NaCl/Writer/Makefile
new file mode 100644
index 0000000000..60da2d1b71
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/NaCl/Writer/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNaClBitWriter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
new file mode 100644
index 0000000000..33d0d84cb5
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
@@ -0,0 +1,1790 @@
+//===--- Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp - Bitcode Writer -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "NaClBitcodeWriter"
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "NaClValueEnumerator.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+/// These are manifest constants used by the bitcode writer. They do
+/// not need to be kept in sync with the reader, but need to be
+/// consistent within this file.
+///
+/// Note that for each block type GROUP, the last entry should be of
+/// the form:
+///
+///    GROUP_MAX_ABBREV = GROUP_LAST_ABBREV,
+///
+/// where GROUP_LAST_ABBREV is the last defined abbreviation. See
+/// include file "llvm/Bitcode/NaCl/NaClBitCodes.h" for more
+/// information on how groups should be defined.
+enum {
+  // VALUE_SYMTAB_BLOCK abbrev id's.
+  VST_ENTRY_8_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  VST_ENTRY_7_ABBREV,
+  VST_ENTRY_6_ABBREV,
+  VST_BBENTRY_6_ABBREV,
+  VST_MAX_ABBREV = VST_BBENTRY_6_ABBREV,
+
+  // CONSTANTS_BLOCK abbrev id's.
+  CONSTANTS_SETTYPE_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  CONSTANTS_INTEGER_ABBREV,
+  CONSTANTS_CE_CAST_Abbrev,
+  CONSTANTS_NULL_Abbrev,
+  CONSTANTS_MAX_ABBREV = CONSTANTS_NULL_Abbrev,
+
+  // CONSTANTS_BLOCK abbrev id's when global (extends list above).
+  CST_CONSTANTS_AGGREGATE_ABBREV = CONSTANTS_MAX_ABBREV+1,
+  CST_CONSTANTS_STRING_ABBREV,
+  CST_CONSTANTS_CSTRING_7_ABBREV,
+  CST_CONSTANTS_CSTRING_6_ABBREV,
+  CST_CONSTANTS_MAX_ABBREV = CST_CONSTANTS_CSTRING_6_ABBREV,
+
+  // GLOBALVAR BLOCK abbrev id's.
+  GLOBALVAR_VAR_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  GLOBALVAR_COMPOUND_ABBREV,
+  GLOBALVAR_ZEROFILL_ABBREV,
+  GLOBALVAR_DATA_ABBREV,
+  GLOBALVAR_RELOC_ABBREV,
+  GLOBALVAR_RELOC_WITH_ADDEND_ABBREV,
+  GLOBALVAR_MAX_ABBREV = GLOBALVAR_RELOC_WITH_ADDEND_ABBREV,
+
+  // FUNCTION_BLOCK abbrev id's.
+  FUNCTION_INST_LOAD_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  FUNCTION_INST_BINOP_ABBREV,
+  FUNCTION_INST_BINOP_FLAGS_ABBREV,
+  FUNCTION_INST_CAST_ABBREV,
+  FUNCTION_INST_RET_VOID_ABBREV,
+  FUNCTION_INST_RET_VAL_ABBREV,
+  FUNCTION_INST_UNREACHABLE_ABBREV,
+  FUNCTION_INST_FORWARDTYPEREF_ABBREV,
+  FUNCTION_INST_MAX_ABBREV = FUNCTION_INST_FORWARDTYPEREF_ABBREV,
+
+  // TYPE_BLOCK_ID_NEW abbrev id's.
+  TYPE_POINTER_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  TYPE_FUNCTION_ABBREV,
+  TYPE_STRUCT_ANON_ABBREV,
+  TYPE_STRUCT_NAME_ABBREV,
+  TYPE_STRUCT_NAMED_ABBREV,
+  TYPE_ARRAY_ABBREV,
+  TYPE_MAX_ABBREV = TYPE_ARRAY_ABBREV,
+
+  // SwitchInst Magic
+  SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
+};
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: report_fatal_error("Unknown cast instruction!");
+  case Instruction::Trunc   : return naclbitc::CAST_TRUNC;
+  case Instruction::ZExt    : return naclbitc::CAST_ZEXT;
+  case Instruction::SExt    : return naclbitc::CAST_SEXT;
+  case Instruction::FPToUI  : return naclbitc::CAST_FPTOUI;
+  case Instruction::FPToSI  : return naclbitc::CAST_FPTOSI;
+  case Instruction::UIToFP  : return naclbitc::CAST_UITOFP;
+  case Instruction::SIToFP  : return naclbitc::CAST_SITOFP;
+  case Instruction::FPTrunc : return naclbitc::CAST_FPTRUNC;
+  case Instruction::FPExt   : return naclbitc::CAST_FPEXT;
+  case Instruction::PtrToInt: return naclbitc::CAST_PTRTOINT;
+  case Instruction::IntToPtr: return naclbitc::CAST_INTTOPTR;
+  case Instruction::BitCast : return naclbitc::CAST_BITCAST;
+  }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: report_fatal_error("Unknown binary instruction!");
+  case Instruction::Add:
+  case Instruction::FAdd: return naclbitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return naclbitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return naclbitc::BINOP_MUL;
+  case Instruction::UDiv: return naclbitc::BINOP_UDIV;
+  case Instruction::FDiv:
+  case Instruction::SDiv: return naclbitc::BINOP_SDIV;
+  case Instruction::URem: return naclbitc::BINOP_UREM;
+  case Instruction::FRem:
+  case Instruction::SRem: return naclbitc::BINOP_SREM;
+  case Instruction::Shl:  return naclbitc::BINOP_SHL;
+  case Instruction::LShr: return naclbitc::BINOP_LSHR;
+  case Instruction::AShr: return naclbitc::BINOP_ASHR;
+  case Instruction::And:  return naclbitc::BINOP_AND;
+  case Instruction::Or:   return naclbitc::BINOP_OR;
+  case Instruction::Xor:  return naclbitc::BINOP_XOR;
+  }
+}
+
+static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) {
+  switch (Op) {
+  default: report_fatal_error("Unknown RMW operation!");
+  case AtomicRMWInst::Xchg: return naclbitc::RMW_XCHG;
+  case AtomicRMWInst::Add: return naclbitc::RMW_ADD;
+  case AtomicRMWInst::Sub: return naclbitc::RMW_SUB;
+  case AtomicRMWInst::And: return naclbitc::RMW_AND;
+  case AtomicRMWInst::Nand: return naclbitc::RMW_NAND;
+  case AtomicRMWInst::Or: return naclbitc::RMW_OR;
+  case AtomicRMWInst::Xor: return naclbitc::RMW_XOR;
+  case AtomicRMWInst::Max: return naclbitc::RMW_MAX;
+  case AtomicRMWInst::Min: return naclbitc::RMW_MIN;
+  case AtomicRMWInst::UMax: return naclbitc::RMW_UMAX;
+  case AtomicRMWInst::UMin: return naclbitc::RMW_UMIN;
+  }
+}
+
+static unsigned GetEncodedOrdering(AtomicOrdering Ordering) {
+  switch (Ordering) {
+  default: report_fatal_error("Invalid ordering");
+  case NotAtomic: return naclbitc::ORDERING_NOTATOMIC;
+  case Unordered: return naclbitc::ORDERING_UNORDERED;
+  case Monotonic: return naclbitc::ORDERING_MONOTONIC;
+  case Acquire: return naclbitc::ORDERING_ACQUIRE;
+  case Release: return naclbitc::ORDERING_RELEASE;
+  case AcquireRelease: return naclbitc::ORDERING_ACQREL;
+  case SequentiallyConsistent: return naclbitc::ORDERING_SEQCST;
+  }
+}
+
+static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) {
+  switch (SynchScope) {
+  default: report_fatal_error("Invalid synch scope");
+  case SingleThread: return naclbitc::SYNCHSCOPE_SINGLETHREAD;
+  case CrossThread: return naclbitc::SYNCHSCOPE_CROSSTHREAD;
+  }
+}
+
+static unsigned GetEncodedCallingConv(CallingConv::ID conv) {
+  switch (conv) {
+  default: report_fatal_error(
+      "Calling convention not supported by PNaCL bitcode");
+  case CallingConv::C: return naclbitc::C_CallingConv;
+  }
+}
+
+static void WriteStringRecord(unsigned Code, StringRef Str,
+                              unsigned AbbrevToUse,
+                              NaClBitstreamWriter &Stream) {
+  SmallVector<unsigned, 64> Vals;
+
+  // Code: [strchar x N]
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (AbbrevToUse && !NaClBitCodeAbbrevOp::isChar6(Str[i]))
+      AbbrevToUse = 0;
+    Vals.push_back(Str[i]);
+  }
+
+  // Emit the finished record.
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteTypeTable\n");
+  const NaClValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+  Stream.EnterSubblock(naclbitc::TYPE_BLOCK_ID_NEW, TYPE_MAX_ABBREV);
+
+  SmallVector<uint64_t, 64> TypeVals;
+
+
+  // Note: modify to use maximum number of bits if under cutoff. Otherwise,
+  // use VBR to take advantage that frequently referenced types have
+  // small IDs.
+  //
+  // Note: Cutoff chosen based on experiments on pnacl-translate.pexe.
+  uint64_t NumBits = NaClBitsNeededForValue(VE.getTypes().size());
+  static const uint64_t TypeVBRCutoff = 6;
+  uint64_t TypeIdNumBits = (NumBits <= TypeVBRCutoff ? NumBits : TypeVBRCutoff);
+  NaClBitCodeAbbrevOp::Encoding TypeIdEncoding =
+      (NumBits <= TypeVBRCutoff
+       ? NaClBitCodeAbbrevOp::Fixed : NaClBitCodeAbbrevOp::VBR);
+
+  // Abbrev for TYPE_CODE_POINTER.
+  NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_POINTER));
+  Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
+  Abbv->Add(NaClBitCodeAbbrevOp(0));  // Addrspace = 0
+  if (TYPE_POINTER_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_FUNCTION.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));  // isvararg
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+  if (TYPE_FUNCTION_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_STRUCT_ANON.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_ANON));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+  if (TYPE_STRUCT_ANON_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_STRUCT_NAME.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_NAME));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+  if (TYPE_STRUCT_NAME_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_STRUCT_NAMED.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_STRUCT_NAMED));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+  if (TYPE_STRUCT_NAMED_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_ARRAY.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_ARRAY));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));   // size
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, NumBits));
+  if (TYPE_ARRAY_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Emit an entry count so the reader can reserve space.
+  TypeVals.push_back(TypeList.size());
+  Stream.EmitRecord(naclbitc::TYPE_CODE_NUMENTRY, TypeVals);
+  TypeVals.clear();
+
+  // Loop over all of the types, emitting each in turn.
+  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+    Type *T = TypeList[i];
+    int AbbrevToUse = 0;
+    unsigned Code = 0;
+
+    switch (T->getTypeID()) {
+    default: llvm_unreachable("Unknown type!");
+    case Type::VoidTyID:      Code = naclbitc::TYPE_CODE_VOID;      break;
+    case Type::HalfTyID:      Code = naclbitc::TYPE_CODE_HALF;      break;
+    case Type::FloatTyID:     Code = naclbitc::TYPE_CODE_FLOAT;     break;
+    case Type::DoubleTyID:    Code = naclbitc::TYPE_CODE_DOUBLE;    break;
+    case Type::X86_FP80TyID:  Code = naclbitc::TYPE_CODE_X86_FP80;  break;
+    case Type::FP128TyID:     Code = naclbitc::TYPE_CODE_FP128;     break;
+    case Type::PPC_FP128TyID: Code = naclbitc::TYPE_CODE_PPC_FP128; break;
+    case Type::LabelTyID:     Code = naclbitc::TYPE_CODE_LABEL;     break;
+    case Type::X86_MMXTyID:   Code = naclbitc::TYPE_CODE_X86_MMX;   break;
+    case Type::IntegerTyID:
+      // INTEGER: [width]
+      Code = naclbitc::TYPE_CODE_INTEGER;
+      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+      break;
+    case Type::PointerTyID: {
+      PointerType *PTy = cast<PointerType>(T);
+      // POINTER: [pointee type, address space]
+      Code = naclbitc::TYPE_CODE_POINTER;
+      TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+      unsigned AddressSpace = PTy->getAddressSpace();
+      TypeVals.push_back(AddressSpace);
+      if (AddressSpace == 0) AbbrevToUse = TYPE_POINTER_ABBREV;
+      break;
+    }
+    case Type::FunctionTyID: {
+      FunctionType *FT = cast<FunctionType>(T);
+      // FUNCTION: [isvararg, retty, paramty x N]
+      Code = naclbitc::TYPE_CODE_FUNCTION;
+      TypeVals.push_back(FT->isVarArg());
+      TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+        TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+      AbbrevToUse = TYPE_FUNCTION_ABBREV;
+      break;
+    }
+    case Type::StructTyID: {
+      StructType *ST = cast<StructType>(T);
+      // STRUCT: [ispacked, eltty x N]
+      TypeVals.push_back(ST->isPacked());
+      // Output all of the element types.
+      for (StructType::element_iterator I = ST->element_begin(),
+           E = ST->element_end(); I != E; ++I)
+        TypeVals.push_back(VE.getTypeID(*I));
+
+      if (ST->isLiteral()) {
+        Code = naclbitc::TYPE_CODE_STRUCT_ANON;
+        AbbrevToUse = TYPE_STRUCT_ANON_ABBREV;
+      } else {
+        if (ST->isOpaque()) {
+          Code = naclbitc::TYPE_CODE_OPAQUE;
+        } else {
+          Code = naclbitc::TYPE_CODE_STRUCT_NAMED;
+          AbbrevToUse = TYPE_STRUCT_NAMED_ABBREV;
+        }
+
+        // Emit the name if it is present.
+        if (!ST->getName().empty())
+          WriteStringRecord(naclbitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+                            TYPE_STRUCT_NAME_ABBREV, Stream);
+      }
+      break;
+    }
+    case Type::ArrayTyID: {
+      ArrayType *AT = cast<ArrayType>(T);
+      // ARRAY: [numelts, eltty]
+      Code = naclbitc::TYPE_CODE_ARRAY;
+      TypeVals.push_back(AT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+      AbbrevToUse = TYPE_ARRAY_ABBREV;
+      break;
+    }
+    case Type::VectorTyID: {
+      VectorType *VT = cast<VectorType>(T);
+      // VECTOR [numelts, eltty]
+      Code = naclbitc::TYPE_CODE_VECTOR;
+      TypeVals.push_back(VT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+      break;
+    }
+    }
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+    TypeVals.clear();
+  }
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteTypeTable\n");
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+  switch (GV->getLinkage()) {
+  case GlobalValue::ExternalLinkage:                 return 0;
+  case GlobalValue::WeakAnyLinkage:                  return 1;
+  case GlobalValue::AppendingLinkage:                return 2;
+  case GlobalValue::InternalLinkage:                 return 3;
+  case GlobalValue::LinkOnceAnyLinkage:              return 4;
+  case GlobalValue::DLLImportLinkage:                return 5;
+  case GlobalValue::DLLExportLinkage:                return 6;
+  case GlobalValue::ExternalWeakLinkage:             return 7;
+  case GlobalValue::CommonLinkage:                   return 8;
+  case GlobalValue::PrivateLinkage:                  return 9;
+  case GlobalValue::WeakODRLinkage:                  return 10;
+  case GlobalValue::LinkOnceODRLinkage:              return 11;
+  case GlobalValue::AvailableExternallyLinkage:      return 12;
+  case GlobalValue::LinkerPrivateLinkage:            return 13;
+  case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
+  case GlobalValue::LinkOnceODRAutoHideLinkage:      return 15;
+  }
+  llvm_unreachable("Invalid linkage");
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  case GlobalValue::DefaultVisibility:   return 0;
+  case GlobalValue::HiddenVisibility:    return 1;
+  case GlobalValue::ProtectedVisibility: return 2;
+  }
+  llvm_unreachable("Invalid visibility");
+}
+
+/// \brief Function to convert constant initializers for global
+/// variables into corresponding bitcode. Takes advantage that these
+/// global variable initializations are normalized (see
+/// lib/Transforms/NaCl/FlattenGlobals.cpp).
+void WriteGlobalInit(const Constant *C, unsigned GlobalVarID,
+                     SmallVectorImpl<uint32_t> &Vals,
+                     const NaClValueEnumerator &VE,
+                     NaClBitstreamWriter &Stream) {
+  if (ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+    if (!Ty->getElementType()->isIntegerTy(8))
+      report_fatal_error("Global array initializer not i8");
+    uint32_t Size = Ty->getNumElements();
+    if (isa<ConstantAggregateZero>(C)) {
+      Vals.push_back(Size);
+      Stream.EmitRecord(naclbitc::GLOBALVAR_ZEROFILL, Vals,
+                        GLOBALVAR_ZEROFILL_ABBREV);
+      Vals.clear();
+    } else {
+      const ConstantDataSequential *CD = cast<ConstantDataSequential>(C);
+      StringRef Data = CD->getRawDataValues();
+      for (size_t i = 0; i < Size; ++i) {
+        Vals.push_back(Data[i] & 0xFF);
+      }
+      Stream.EmitRecord(naclbitc::GLOBALVAR_DATA, Vals,
+                        GLOBALVAR_DATA_ABBREV);
+      Vals.clear();
+    }
+    return;
+  }
+  if (C->getType()->isIntegerTy(32)) {
+    // This constant defines a relocation. Start by verifying the
+    // relocation is of the right form.
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+    if (CE == 0)
+      report_fatal_error("Global i32 initializer not constant");
+    assert(CE);
+    int32_t Addend = 0;
+    if (CE->getOpcode() == Instruction::Add) {
+      const ConstantInt *AddendConst = dyn_cast<ConstantInt>(CE->getOperand(1));
+      if (AddendConst == 0)
+        report_fatal_error("Malformed addend in global relocation initializer");
+      Addend = AddendConst->getSExtValue();
+      CE = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      if (CE == 0)
+        report_fatal_error(
+            "Base of global relocation initializer not constant");
+    }
+    if (CE->getOpcode() != Instruction::PtrToInt)
+      report_fatal_error("Global relocation base doesn't contain ptrtoint");
+    GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0));
+    if (GV == 0)
+      report_fatal_error(
+          "Argument of ptrtoint in global relocation no global value");
+
+    // Now generate the corresponding relocation record.
+    unsigned RelocID = VE.getValueID(GV);
+    // This is a value index.
+    unsigned AbbrevToUse = GLOBALVAR_RELOC_ABBREV;
+    Vals.push_back(RelocID);
+    if (Addend) {
+      Vals.push_back(Addend);
+      AbbrevToUse = GLOBALVAR_RELOC_WITH_ADDEND_ABBREV;
+    }
+    Stream.EmitRecord(naclbitc::GLOBALVAR_RELOC, Vals, AbbrevToUse);
+    Vals.clear();
+    return;
+  }
+  report_fatal_error("Global initializer is not a SimpleElement");
+}
+
+// Emit global variables.
+static void WriteGlobalVars(const Module *M,
+                            const NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  Stream.EnterSubblock(naclbitc::GLOBALVAR_BLOCK_ID);
+  SmallVector<uint32_t, 32> Vals;
+  unsigned GlobalVarID = VE.getFirstGlobalVarID();
+
+  // Emit the number of global variables.
+
+  Vals.push_back(M->getGlobalList().size());
+  Stream.EmitRecord(naclbitc::GLOBALVAR_COUNT, Vals);
+  Vals.clear();
+
+  // Now emit each global variable.
+  for (Module::const_global_iterator
+           GV = M->global_begin(), E = M->global_end();
+       GV != E; ++GV, ++GlobalVarID) {
+    // Define the global variable.
+    Vals.push_back(Log2_32(GV->getAlignment()) + 1);
+    Vals.push_back(GV->isConstant());
+    Stream.EmitRecord(naclbitc::GLOBALVAR_VAR, Vals, GLOBALVAR_VAR_ABBREV);
+    Vals.clear();
+
+    // Add the field(s).
+    const Constant *C = GV->getInitializer();
+    if (C == 0)
+      report_fatal_error("Global variable initializer not a constant");
+    if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+      if (!CS->getType()->isPacked())
+        report_fatal_error("Global variable type not packed");
+      if (CS->getType()->hasName())
+        report_fatal_error("Global variable type is named");
+      Vals.push_back(CS->getNumOperands());
+      Stream.EmitRecord(naclbitc::GLOBALVAR_COMPOUND, Vals,
+                        GLOBALVAR_COMPOUND_ABBREV);
+      Vals.clear();
+      for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+        WriteGlobalInit(dyn_cast<Constant>(CS->getOperand(I)), GlobalVarID,
+                        Vals, VE, Stream);
+      }
+    } else {
+      WriteGlobalInit(C, GlobalVarID, Vals, VE, Stream);
+    }
+  }
+
+  assert(GlobalVarID == VE.getFirstGlobalVarID() + VE.getNumGlobalVarIDs());
+  Stream.ExitBlock();
+}
+
+// Emit top-level description of module, including inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteModuleInfo\n");
+  // Emit various pieces of data attached to a module.
+  if (!M->getModuleInlineAsm().empty())
+    WriteStringRecord(naclbitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+                      0/*TODO*/, Stream);
+
+  // Emit information about sections and GC, computing how many there are. Also
+  // compute the maximum alignment value.
+  // TODO(kschimpf): Remove code for SectionMap and GCMap.
+  std::map<std::string, unsigned> SectionMap;
+  std::map<std::string, unsigned> GCMap;
+  for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+       GV != E; ++GV) {
+    if (GV->hasSection()) {
+      // Give section names unique ID's.
+      unsigned &Entry = SectionMap[GV->getSection()];
+      if (!Entry) {
+        WriteStringRecord(naclbitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+                          0/*TODO*/, Stream);
+        Entry = SectionMap.size();
+      }
+    }
+  }
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    if (F->hasSection()) {
+      // Give section names unique ID's.
+      unsigned &Entry = SectionMap[F->getSection()];
+      if (!Entry) {
+        WriteStringRecord(naclbitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+                          0/*TODO*/, Stream);
+        Entry = SectionMap.size();
+      }
+    }
+    if (F->hasGC()) {
+      // Same for GC names.
+      unsigned &Entry = GCMap[F->getGC()];
+      if (!Entry) {
+        WriteStringRecord(naclbitc::MODULE_CODE_GCNAME, F->getGC(),
+                          0/*TODO*/, Stream);
+        Entry = GCMap.size();
+      }
+    }
+  }
+
+  // Emit the function proto information. Note: We do this before
+  // global variables, so that global variable initializations can
+  // refer to the functions without a forward reference.
+  SmallVector<unsigned, 64> Vals;
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    Vals.push_back(VE.getTypeID(F->getType()));
+    Vals.push_back(GetEncodedCallingConv(F->getCallingConv()));
+    Vals.push_back(F->isDeclaration());
+    Vals.push_back(getEncodedLinkage(F));
+
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(naclbitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+  // Emit the global variable information.
+  WriteGlobalVars(M, VE, Stream);
+
+  // Emit the alias information.
+  for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+       AI != E; ++AI) {
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    Vals.push_back(VE.getTypeID(AI->getType()));
+    Vals.push_back(VE.getValueID(AI->getAliasee()));
+    Vals.push_back(getEncodedLinkage(AI));
+    Vals.push_back(getEncodedVisibility(AI));
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(naclbitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+  DEBUG(dbgs() << "<- WriteModuleInfo\n");
+}
+
+static uint64_t GetOptimizationFlags(const Value *V) {
+  uint64_t Flags = 0;
+
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(V)) {
+    if (OBO->hasNoSignedWrap())
+      Flags |= 1 << naclbitc::OBO_NO_SIGNED_WRAP;
+    if (OBO->hasNoUnsignedWrap())
+      Flags |= 1 << naclbitc::OBO_NO_UNSIGNED_WRAP;
+  } else if (const PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(V)) {
+    if (PEO->isExact())
+      Flags |= 1 << naclbitc::PEO_EXACT;
+  } else if (const FPMathOperator *FPMO =
+             dyn_cast<const FPMathOperator>(V)) {
+    if (FPMO->hasUnsafeAlgebra())
+      Flags |= 1 << naclbitc::FPO_UNSAFE_ALGEBRA;
+    if (FPMO->hasNoNaNs())
+      Flags |= 1 << naclbitc::FPO_NO_NANS;
+    if (FPMO->hasNoInfs())
+      Flags |= 1 << naclbitc::FPO_NO_INFS;
+    if (FPMO->hasNoSignedZeros())
+      Flags |= 1 << naclbitc::FPO_NO_SIGNED_ZEROS;
+    if (FPMO->hasAllowReciprocal())
+      Flags |= 1 << naclbitc::FPO_ALLOW_RECIPROCAL;
+  }
+
+  return Flags;
+}
+
+static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
+  Vals.push_back(NaClEncodeSignRotatedValue((int64_t)V));
+}
+
+static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
+                      unsigned &Code, unsigned &AbbrevToUse, const APInt &Val,
+                      bool EmitSizeForWideNumbers = false
+                      ) {
+  if (Val.getBitWidth() <= 64) {
+    uint64_t V = Val.getSExtValue();
+    emitSignedInt64(Vals, V);
+    Code = naclbitc::CST_CODE_INTEGER;
+    AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+  } else {
+    // Wide integers, > 64 bits in size.
+    // We have an arbitrary precision integer value to write whose
+    // bit width is > 64. However, in canonical unsigned integer
+    // format it is likely that the high bits are going to be zero.
+    // So, we only write the number of active words.
+    unsigned NWords = Val.getActiveWords();
+
+    if (EmitSizeForWideNumbers)
+      Vals.push_back(NWords);
+
+    const uint64_t *RawWords = Val.getRawData();
+    for (unsigned i = 0; i != NWords; ++i) {
+      emitSignedInt64(Vals, RawWords[i]);
+    }
+    Code = naclbitc::CST_CODE_WIDE_INTEGER;
+  }
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+                           const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream, bool isGlobal) {
+  if (FirstVal == LastVal) return;
+
+  Stream.EnterSubblock(naclbitc::CONSTANTS_BLOCK_ID,
+                       (isGlobal
+                        ? CST_CONSTANTS_MAX_ABBREV
+                        : CONSTANTS_MAX_ABBREV));
+
+  unsigned AggregateAbbrev = 0;
+  unsigned String8Abbrev = 0;
+  unsigned CString7Abbrev = 0;
+  unsigned CString6Abbrev = 0;
+  // If this is a constant pool for the module, emit module-specific abbrevs.
+  // Note: These abbreviations are size specific (to LastVal), and hence,
+  // can be more efficient if LastVal is known (rather then generating
+  // up-front for all constant sections).
+  if (isGlobal) {
+    // Abbrev for CST_CODE_AGGREGATE.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_AGGREGATE));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed,
+                                  NaClBitsNeededForValue(LastVal)));
+    AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+    if (CST_CONSTANTS_AGGREGATE_ABBREV != AggregateAbbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+
+    // Abbrev for CST_CODE_STRING.
+    Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_STRING));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 8));
+    String8Abbrev = Stream.EmitAbbrev(Abbv);
+    if (CST_CONSTANTS_STRING_ABBREV != String8Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_CSTRING));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 7));
+    CString7Abbrev = Stream.EmitAbbrev(Abbv);
+    if (CST_CONSTANTS_CSTRING_7_ABBREV != CString7Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_CSTRING));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+    CString6Abbrev = Stream.EmitAbbrev(Abbv);
+    if (CST_CONSTANTS_CSTRING_6_ABBREV != CString6Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+
+    DEBUG(dbgs() << "-- emitted abbreviations\n");
+  }
+
+
+  SmallVector<uint64_t, 64> Record;
+
+  const NaClValueEnumerator::ValueList &Vals = VE.getValues();
+  Type *LastTy = 0;
+  for (unsigned i = FirstVal; i != LastVal; ++i) {
+    const Value *V = Vals[i].first;
+    // If we need to switch types, do so now.
+    if (V->getType() != LastTy) {
+      LastTy = V->getType();
+      Record.push_back(VE.getTypeID(LastTy));
+      Stream.EmitRecord(naclbitc::CST_CODE_SETTYPE, Record,
+                        CONSTANTS_SETTYPE_ABBREV);
+      Record.clear();
+    }
+
+    if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+      Record.push_back(unsigned(IA->hasSideEffects()) |
+                       unsigned(IA->isAlignStack()) << 1 |
+                       unsigned(IA->getDialect()&1) << 2);
+
+      // Add the asm string.
+      const std::string &AsmStr = IA->getAsmString();
+      Record.push_back(AsmStr.size());
+      for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+        Record.push_back(AsmStr[i]);
+
+      // Add the constraint string.
+      const std::string &ConstraintStr = IA->getConstraintString();
+      Record.push_back(ConstraintStr.size());
+      for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+        Record.push_back(ConstraintStr[i]);
+      Stream.EmitRecord(naclbitc::CST_CODE_INLINEASM, Record);
+      Record.clear();
+      continue;
+    }
+    const Constant *C = cast<Constant>(V);
+    unsigned Code = -1U;
+    unsigned AbbrevToUse = 0;
+    if (C->isNullValue()) {
+      Code = naclbitc::CST_CODE_NULL;
+    } else if (isa<UndefValue>(C)) {
+      Code = naclbitc::CST_CODE_UNDEF;
+    } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+      EmitAPInt(Record, Code, AbbrevToUse, IV->getValue());
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      Code = naclbitc::CST_CODE_FLOAT;
+      Type *Ty = CFP->getType();
+      if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) {
+        Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      } else if (Ty->isX86_FP80Ty()) {
+        // api needed to prevent premature destruction
+        // bits are not in the same order as a normal i80 APInt, compensate.
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back((p[1] << 48) | (p[0] >> 16));
+        Record.push_back(p[0] & 0xffffLL);
+      } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back(p[0]);
+        Record.push_back(p[1]);
+      } else {
+        assert (0 && "Unknown FP type!");
+      }
+    } else if (isa<ConstantDataSequential>(C) &&
+               cast<ConstantDataSequential>(C)->isString()) {
+      const ConstantDataSequential *Str = cast<ConstantDataSequential>(C);
+      // Emit constant strings specially.
+      unsigned NumElts = Str->getNumElements();
+      // If this is a null-terminated string, use the denser CSTRING encoding.
+      if (Str->isCString()) {
+        Code = naclbitc::CST_CODE_CSTRING;
+        --NumElts;  // Don't encode the null, which isn't allowed by char6.
+      } else {
+        Code = naclbitc::CST_CODE_STRING;
+        AbbrevToUse = String8Abbrev;
+      }
+      bool isCStr7 = Code == naclbitc::CST_CODE_CSTRING;
+      bool isCStrChar6 = Code == naclbitc::CST_CODE_CSTRING;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        unsigned char V = Str->getElementAsInteger(i);
+        Record.push_back(V);
+        isCStr7 &= (V & 128) == 0;
+        if (isCStrChar6)
+          isCStrChar6 = NaClBitCodeAbbrevOp::isChar6(V);
+      }
+
+      if (isCStrChar6)
+        AbbrevToUse = CString6Abbrev;
+      else if (isCStr7)
+        AbbrevToUse = CString7Abbrev;
+    } else if (const ConstantDataSequential *CDS =
+                  dyn_cast<ConstantDataSequential>(C)) {
+      Code = naclbitc::CST_CODE_DATA;
+      Type *EltTy = CDS->getType()->getElementType();
+      if (isa<IntegerType>(EltTy)) {
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+          Record.push_back(CDS->getElementAsInteger(i));
+      } else if (EltTy->isFloatTy()) {
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+          union { float F; uint32_t I; };
+          F = CDS->getElementAsFloat(i);
+          Record.push_back(I);
+        }
+      } else {
+        assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
+        for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+          union { double F; uint64_t I; };
+          F = CDS->getElementAsDouble(i);
+          Record.push_back(I);
+        }
+      }
+    } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+               isa<ConstantVector>(C)) {
+      Code = naclbitc::CST_CODE_AGGREGATE;
+      for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+        Record.push_back(VE.getValueID(C->getOperand(i)));
+      AbbrevToUse = AggregateAbbrev;
+    } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      switch (CE->getOpcode()) {
+      default:
+        if (Instruction::isCast(CE->getOpcode())) {
+          Code = naclbitc::CST_CODE_CE_CAST;
+          Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+        } else {
+          assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+          Code = naclbitc::CST_CODE_CE_BINOP;
+          Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          Record.push_back(VE.getValueID(C->getOperand(1)));
+          uint64_t Flags = GetOptimizationFlags(CE);
+          if (Flags != 0)
+            Record.push_back(Flags);
+        }
+        break;
+      case Instruction::GetElementPtr:
+        Code = naclbitc::CST_CODE_CE_GEP;
+        if (cast<GEPOperator>(C)->isInBounds())
+          Code = naclbitc::CST_CODE_CE_INBOUNDS_GEP;
+        for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+          Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(i)));
+        }
+        break;
+      case Instruction::Select:
+        Code = naclbitc::CST_CODE_CE_SELECT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ExtractElement:
+        Code = naclbitc::CST_CODE_CE_EXTRACTELT;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        break;
+      case Instruction::InsertElement:
+        Code = naclbitc::CST_CODE_CE_INSERTELT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ShuffleVector:
+        // If the return type and argument types are the same, this is a
+        // standard shufflevector instruction.  If the types are different,
+        // then the shuffle is widening or truncating the input vectors, and
+        // the argument type must also be encoded.
+        if (C->getType() == C->getOperand(0)->getType()) {
+          Code = naclbitc::CST_CODE_CE_SHUFFLEVEC;
+        } else {
+          Code = naclbitc::CST_CODE_CE_SHUFVEC_EX;
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        }
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        Code = naclbitc::CST_CODE_CE_CMP;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(CE->getPredicate());
+        break;
+      }
+    } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+      Code = naclbitc::CST_CODE_BLOCKADDRESS;
+      Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+      Record.push_back(VE.getValueID(BA->getFunction()));
+      Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+    } else {
+#ifndef NDEBUG
+      C->dump();
+#endif
+      llvm_unreachable("Unknown constant!");
+    }
+    Stream.EmitRecord(Code, Record, AbbrevToUse);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteConstants\n");
+}
+
+static void WriteModuleConstants(const NaClValueEnumerator &VE,
+                                 NaClBitstreamWriter &Stream) {
+  const NaClValueEnumerator::ValueList &Vals = VE.getValues();
+
+  // Find the first constant to emit, which is the first non-globalvalue value.
+  // We know globalvalues have been emitted by WriteModuleInfo.
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+    if (!isa<GlobalValue>(Vals[i].first)) {
+      WriteConstants(i, Vals.size(), VE, Stream, true);
+      return;
+    }
+  }
+}
+
+/// \brief Emits a type for the forward value reference. That is, if
+/// the ID for the given value is larger than or equal to the BaseID,
+/// the corresponding forward reference is generated.
+static void EmitFnForwardTypeRef(const Value *V,
+                                 unsigned BaseID,
+                                 NaClValueEnumerator &VE,
+                                 NaClBitstreamWriter &Stream) {
+  unsigned ValID = VE.getValueID(V);
+  if (ValID >= BaseID &&
+      VE.InsertFnForwardTypeRef(ValID)) {
+    SmallVector<unsigned, 2> Vals;
+    Vals.push_back(ValID);
+    Vals.push_back(VE.getTypeID(V->getType()));
+    Stream.EmitRecord(naclbitc::FUNC_CODE_INST_FORWARDTYPEREF, Vals,
+                      FUNCTION_INST_FORWARDTYPEREF_ABBREV);
+  }
+}
+
+/// pushValue - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references.  However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals.  If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.  The value ID that is written is encoded relative to the InstID.
+static void pushValue(const Value *V, unsigned InstID,
+                      SmallVector<unsigned, 64> &Vals,
+                      NaClValueEnumerator &VE,
+                      NaClBitstreamWriter &Stream) {
+  EmitFnForwardTypeRef(V, InstID, VE, Stream);
+  unsigned ValID = VE.getValueID(V);
+  // Make encoding relative to the InstID.
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValue64(const Value *V, unsigned InstID,
+                        SmallVector<uint64_t, 128> &Vals,
+                        NaClValueEnumerator &VE,
+                        NaClBitstreamWriter &Stream) {
+  EmitFnForwardTypeRef(V, InstID, VE, Stream);
+  uint64_t ValID = VE.getValueID(V);
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValueSigned(const Value *V, unsigned InstID,
+                            SmallVector<uint64_t, 128> &Vals,
+                            NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  EmitFnForwardTypeRef(V, InstID, VE, Stream);
+  unsigned ValID = VE.getValueID(V);
+  int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+  emitSignedInt64(Vals, diff);
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+                             NaClValueEnumerator &VE,
+                             NaClBitstreamWriter &Stream,
+                             SmallVector<unsigned, 64> &Vals) {
+  unsigned Code = 0;
+  unsigned AbbrevToUse = 0;
+  VE.setInstructionID(&I);
+  switch (I.getOpcode()) {
+  default:
+    if (Instruction::isCast(I.getOpcode())) {
+      // CAST:       [opval, destty, castopc]
+      Code = naclbitc::FUNC_CODE_INST_CAST;
+      AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+      Vals.push_back(VE.getTypeID(I.getType()));
+      Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+    } else {
+      // BINOP:      [opval, opval, opcode[, flags]]
+      assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+      Code = naclbitc::FUNC_CODE_INST_BINOP;
+      AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+      pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+      Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+      uint64_t Flags = GetOptimizationFlags(&I);
+      if (Flags != 0) {
+        AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+        Vals.push_back(Flags);
+      }
+    }
+    break;
+
+  case Instruction::GetElementPtr:
+    Code = naclbitc::FUNC_CODE_INST_GEP;
+    if (cast<GEPOperator>(&I)->isInBounds())
+      Code = naclbitc::FUNC_CODE_INST_INBOUNDS_GEP;
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      pushValue(I.getOperand(i), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::ExtractValue: {
+    Code = naclbitc::FUNC_CODE_INST_EXTRACTVAL;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::InsertValue: {
+    Code = naclbitc::FUNC_CODE_INST_INSERTVAL;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::Select:
+    Code = naclbitc::FUNC_CODE_INST_VSELECT;
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(2), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::ExtractElement:
+    Code = naclbitc::FUNC_CODE_INST_EXTRACTELT;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::InsertElement:
+    Code = naclbitc::FUNC_CODE_INST_INSERTELT;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(2), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::ShuffleVector:
+    Code = naclbitc::FUNC_CODE_INST_SHUFFLEVEC;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(2), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    // compare returning Int1Ty or vector of Int1Ty
+    Code = naclbitc::FUNC_CODE_INST_CMP2;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    Vals.push_back(cast<CmpInst>(I).getPredicate());
+    break;
+
+  case Instruction::Ret:
+    {
+      Code = naclbitc::FUNC_CODE_INST_RET;
+      unsigned NumOperands = I.getNumOperands();
+      if (NumOperands == 0)
+        AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+      else if (NumOperands == 1) {
+        pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+        AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+      } else {
+        for (unsigned i = 0, e = NumOperands; i != e; ++i)
+          pushValue(I.getOperand(i), InstID, Vals, VE, Stream);
+      }
+    }
+    break;
+  case Instruction::Br:
+    {
+      Code = naclbitc::FUNC_CODE_INST_BR;
+      const BranchInst &II = cast<BranchInst>(I);
+      Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+      if (II.isConditional()) {
+        Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+        pushValue(II.getCondition(), InstID, Vals, VE, Stream);
+      }
+    }
+    break;
+  case Instruction::Switch:
+    {
+      // Redefine Vals, since here we need to use 64 bit values
+      // explicitly to store large APInt numbers.
+      SmallVector<uint64_t, 128> Vals64;
+
+      Code = naclbitc::FUNC_CODE_INST_SWITCH;
+      const SwitchInst &SI = cast<SwitchInst>(I);
+
+      Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
+      pushValue64(SI.getCondition(), InstID, Vals64, VE, Stream);
+      Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
+      Vals64.push_back(SI.getNumCases());
+      for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+           i != e; ++i) {
+        const IntegersSubset& CaseRanges = i.getCaseValueEx();
+        unsigned Code, Abbrev; // will unused.
+
+        if (CaseRanges.isSingleNumber()) {
+          Vals64.push_back(1/*NumItems = 1*/);
+          Vals64.push_back(true/*IsSingleNumber = true*/);
+          EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true);
+        } else {
+
+          Vals64.push_back(CaseRanges.getNumItems());
+
+          if (CaseRanges.isSingleNumbersOnly()) {
+            for (unsigned ri = 0, rn = CaseRanges.getNumItems();
+                 ri != rn; ++ri) {
+
+              Vals64.push_back(true/*IsSingleNumber = true*/);
+
+              EmitAPInt(Vals64, Code, Abbrev,
+                        CaseRanges.getSingleNumber(ri), true);
+            }
+          } else
+            for (unsigned ri = 0, rn = CaseRanges.getNumItems();
+                 ri != rn; ++ri) {
+              IntegersSubset::Range r = CaseRanges.getItem(ri);
+              bool IsSingleNumber = CaseRanges.isSingleNumber(ri);
+
+              Vals64.push_back(IsSingleNumber);
+
+              EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true);
+              if (!IsSingleNumber)
+                EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true);
+            }
+        }
+        Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
+      }
+
+      Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+
+      // Also do expected action - clear external Vals collection:
+      Vals.clear();
+      return;
+    }
+    break;
+  case Instruction::IndirectBr:
+    Code = naclbitc::FUNC_CODE_INST_INDIRECTBR;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    // Encode the address operand as relative, but not the basic blocks.
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+
+  case Instruction::Invoke:
+    report_fatal_error("Invoke is not allowed in PNaCl bitcode");
+    break;
+  case Instruction::Resume:
+    Code = naclbitc::FUNC_CODE_INST_RESUME;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::Unreachable:
+    Code = naclbitc::FUNC_CODE_INST_UNREACHABLE;
+    AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+    break;
+
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
+    Code = naclbitc::FUNC_CODE_INST_PHI;
+    // With the newer instruction encoding, forward references could give
+    // negative valued IDs.  This is most common for PHIs, so we use
+    // signed VBRs.
+    SmallVector<uint64_t, 128> Vals64;
+    Vals64.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE, Stream);
+      Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
+    // Emit a Vals64 vector and exit.
+    Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+    Vals64.clear();
+    return;
+  }
+
+  case Instruction::LandingPad: {
+    const LandingPadInst &LP = cast<LandingPadInst>(I);
+    Code = naclbitc::FUNC_CODE_INST_LANDINGPAD;
+    Vals.push_back(VE.getTypeID(LP.getType()));
+    pushValue(LP.getPersonalityFn(), InstID, Vals, VE, Stream);
+    Vals.push_back(LP.isCleanup());
+    Vals.push_back(LP.getNumClauses());
+    for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) {
+      if (LP.isCatch(I))
+        Vals.push_back(LandingPadInst::Catch);
+      else
+        Vals.push_back(LandingPadInst::Filter);
+      pushValue(LP.getClause(I), InstID, Vals, VE, Stream);
+    }
+    break;
+  }
+
+  case Instruction::Alloca:
+    if (!cast<AllocaInst>(&I)->getAllocatedType()->isIntegerTy(8))
+      report_fatal_error("Type of alloca instruction is not i8");
+    Code = naclbitc::FUNC_CODE_INST_ALLOCA;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream); // size.
+    Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+    break;
+
+  case Instruction::Load:
+    if (cast<LoadInst>(I).isAtomic()) {
+      Code = naclbitc::FUNC_CODE_INST_LOADATOMIC;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    } else {
+      Code = naclbitc::FUNC_CODE_INST_LOAD;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);  // ptr
+      AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+    }
+    Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+    Vals.push_back(cast<LoadInst>(I).isVolatile());
+    if (cast<LoadInst>(I).isAtomic()) {
+      Vals.push_back(GetEncodedOrdering(cast<LoadInst>(I).getOrdering()));
+      Vals.push_back(GetEncodedSynchScope(cast<LoadInst>(I).getSynchScope()));
+    }
+    break;
+  case Instruction::Store:
+    if (cast<StoreInst>(I).isAtomic())
+      Code = naclbitc::FUNC_CODE_INST_STOREATOMIC;
+    else
+      Code = naclbitc::FUNC_CODE_INST_STORE;
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);  // ptrty + ptr
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);  // val.
+    Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+    Vals.push_back(cast<StoreInst>(I).isVolatile());
+    if (cast<StoreInst>(I).isAtomic()) {
+      Vals.push_back(GetEncodedOrdering(cast<StoreInst>(I).getOrdering()));
+      Vals.push_back(GetEncodedSynchScope(cast<StoreInst>(I).getSynchScope()));
+    }
+    break;
+  case Instruction::AtomicCmpXchg:
+    Code = naclbitc::FUNC_CODE_INST_CMPXCHG;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);  // ptrty + ptr
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);  // cmp.
+    pushValue(I.getOperand(2), InstID, Vals, VE, Stream);  // newval.
+    Vals.push_back(cast<AtomicCmpXchgInst>(I).isVolatile());
+    Vals.push_back(GetEncodedOrdering(
+                     cast<AtomicCmpXchgInst>(I).getOrdering()));
+    Vals.push_back(GetEncodedSynchScope(
+                     cast<AtomicCmpXchgInst>(I).getSynchScope()));
+    break;
+  case Instruction::AtomicRMW:
+    Code = naclbitc::FUNC_CODE_INST_ATOMICRMW;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);  // ptrty + ptr
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);  // val.
+    Vals.push_back(GetEncodedRMWOperation(
+                     cast<AtomicRMWInst>(I).getOperation()));
+    Vals.push_back(cast<AtomicRMWInst>(I).isVolatile());
+    Vals.push_back(GetEncodedOrdering(cast<AtomicRMWInst>(I).getOrdering()));
+    Vals.push_back(GetEncodedSynchScope(
+                     cast<AtomicRMWInst>(I).getSynchScope()));
+    break;
+  case Instruction::Fence:
+    Code = naclbitc::FUNC_CODE_INST_FENCE;
+    Vals.push_back(GetEncodedOrdering(cast<FenceInst>(I).getOrdering()));
+    Vals.push_back(GetEncodedSynchScope(cast<FenceInst>(I).getSynchScope()));
+    break;
+  case Instruction::Call: {
+    const CallInst &CI = cast<CallInst>(I);
+    PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+    Code = naclbitc::FUNC_CODE_INST_CALL;
+
+    Vals.push_back((GetEncodedCallingConv(CI.getCallingConv()) << 1)
+                   | unsigned(CI.isTailCall()));
+    pushValue(CI.getCalledValue(), InstID, Vals, VE, Stream);  // Callee
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+      // Check for labels (can happen with asm labels).
+      if (FTy->getParamType(i)->isLabelTy())
+        Vals.push_back(VE.getValueID(CI.getArgOperand(i)));
+      else
+        // fixed param.
+        pushValue(CI.getArgOperand(i), InstID, Vals, VE, Stream);
+    }
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
+           i != e; ++i)
+        // varargs
+        pushValue(CI.getArgOperand(i), InstID, Vals, VE, Stream);
+    }
+    break;
+  }
+  case Instruction::VAArg:
+    Code = naclbitc::FUNC_CODE_INST_VAARG;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));   // valistty
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream); // valist.
+    Vals.push_back(VE.getTypeID(I.getType())); // restype.
+    break;
+  }
+
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+  Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+                                  const NaClValueEnumerator &VE,
+                                  NaClBitstreamWriter &Stream) {
+  if (VST.empty()) return;
+  Stream.EnterSubblock(naclbitc::VALUE_SYMTAB_BLOCK_ID);
+
+  // FIXME: Set up the abbrev, we know how many values there are!
+  // FIXME: We know if the type names can use 7-bit ascii.
+  SmallVector<unsigned, 64> NameVals;
+
+  for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+       SI != SE; ++SI) {
+
+    const ValueName &Name = *SI;
+
+    // Figure out the encoding to use for the name.
+    bool is7Bit = true;
+    bool isChar6 = true;
+    for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+         C != E; ++C) {
+      if (isChar6)
+        isChar6 = NaClBitCodeAbbrevOp::isChar6(*C);
+      if ((unsigned char)*C & 128) {
+        is7Bit = false;
+        break;  // don't bother scanning the rest.
+      }
+    }
+
+    unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+    // VST_ENTRY:   [valueid, namechar x N]
+    // VST_BBENTRY: [bbid, namechar x N]
+    unsigned Code;
+    if (isa<BasicBlock>(SI->getValue())) {
+      Code = naclbitc::VST_CODE_BBENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_BBENTRY_6_ABBREV;
+    } else {
+      Code = naclbitc::VST_CODE_ENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_ENTRY_6_ABBREV;
+      else if (is7Bit)
+        AbbrevToUse = VST_ENTRY_7_ABBREV;
+    }
+
+    NameVals.push_back(VE.getValueID(SI->getValue()));
+    for (const char *P = Name.getKeyData(),
+         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+      NameVals.push_back((unsigned char)*P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+    NameVals.clear();
+  }
+  Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, NaClValueEnumerator &VE,
+                          NaClBitstreamWriter &Stream) {
+  Stream.EnterSubblock(naclbitc::FUNCTION_BLOCK_ID);
+  VE.incorporateFunction(F);
+
+  SmallVector<unsigned, 64> Vals;
+
+  // Emit the number of basic blocks, so the reader can create them ahead of
+  // time.
+  Vals.push_back(VE.getBasicBlocks().size());
+  Stream.EmitRecord(naclbitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+  Vals.clear();
+
+  // If there are function-local constants, emit them now.
+  unsigned CstStart, CstEnd;
+  VE.getFunctionConstantRange(CstStart, CstEnd);
+  WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+  // Keep a running idea of what the instruction ID is.
+  unsigned InstID = CstEnd;
+
+  // Finally, emit all the instructions, in order.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      WriteInstruction(*I, InstID, VE, Stream, Vals);
+
+      if (!I->getType()->isVoidTy())
+        ++InstID;
+    }
+
+  // Emit names for all the instructions etc.
+  WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+  VE.purgeFunction();
+  Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream) {
+  // We only want to emit block info records for blocks that have multiple
+  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+  // Other blocks can define their abbrevs inline.
+  Stream.EnterBlockInfoBlock();
+
+  { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 3));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_8_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // 7-bit fixed width VST_ENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_ENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 7));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_7_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_ENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_ENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_BBENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_BBENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_BBENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+
+
+  { // SETTYPE abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_SETTYPE));
+    Abbv->Add(NaClBitCodeAbbrevOp(
+        NaClBitCodeAbbrevOp::Fixed,
+        NaClBitsNeededForValue(VE.getTypes().size())));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_SETTYPE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INTEGER abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_INTEGER));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_INTEGER_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // CE_CAST abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_CE_CAST));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4));  // cast opc
+    Abbv->Add(NaClBitCodeAbbrevOp(
+        NaClBitCodeAbbrevOp::Fixed,                                 // typeid
+        NaClBitsNeededForValue(VE.getTypes().size())));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));    // value id
+
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_CE_CAST_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // NULL abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_NULL));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_NULL_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  // FIXME: This should only use space for first class types!
+
+  { // INST_LOAD abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_LOAD));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 4)); // Align
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1)); // volatile
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_LOAD_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4)); // opc
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4)); // opc
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 7)); // flags
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_CAST abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_CAST));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));    // OpVal
+    Abbv->Add(NaClBitCodeAbbrevOp(
+        NaClBitCodeAbbrevOp::Fixed,                                 // dest ty
+        NaClBitsNeededForValue(VE.getTypes().size())));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4));  // opc
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_CAST_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_RET));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_RET));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // ValID
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_UNREACHABLE));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_FORWARDTYPEREF abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_FORWARDTYPEREF));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_FORWARDTYPEREF_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // VAR abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_VAR));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_VAR_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // COMPOUND abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_COMPOUND));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_COMPOUND_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // ZEROFILL abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_ZEROFILL));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_ZEROFILL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // DATA abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_DATA));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_DATA_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // RELOC abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_RELOC));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_RELOC_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // RELOC_WITH_ADDEND_ABBREV abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_RELOC));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(
+            naclbitc::GLOBALVAR_BLOCK_ID,
+            Abbv) != GLOBALVAR_RELOC_WITH_ADDEND_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  Stream.ExitBlock();
+}
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteModule\n");
+  Stream.EnterSubblock(naclbitc::MODULE_BLOCK_ID);
+
+  SmallVector<unsigned, 1> Vals;
+  unsigned CurVersion = 1;
+  Vals.push_back(CurVersion);
+  Stream.EmitRecord(naclbitc::MODULE_CODE_VERSION, Vals);
+
+  // Analyze the module, enumerating globals, functions, etc.
+  NaClValueEnumerator VE(M);
+
+  // Emit blockinfo, which defines the standard abbreviations etc.
+  WriteBlockInfo(VE, Stream);
+
+  // Emit information describing all of the types in the module.
+  WriteTypeTable(VE, Stream);
+
+  // Emit top-level description of module, including inline asm,
+  // descriptors for global variables, and function prototype info.
+  WriteModuleInfo(M, VE, Stream);
+
+  // Emit constants.
+  WriteModuleConstants(VE, Stream);
+
+  // Emit names for globals/functions etc.
+  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+  // Emit function bodies.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+    if (!F->isDeclaration())
+      WriteFunction(*F, VE, Stream);
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteModule\n");
+}
+
+// Max size for variable fields. Currently only used for writing them
+// out to files (the parsing works for arbitrary sizes).
+static const size_t kMaxVariableFieldSize = 256;
+
+// Write out the given fields to the bitstream.
+static void WriteHeaderFields(
+    const std::vector<NaClBitcodeHeaderField*> &Fields,
+    NaClBitstreamWriter& Stream) {
+  // Emit placeholder for number of bytes used to hold header fields.
+  // This value is necessary so that the streamable reader can preallocate
+  // a buffer to read the fields.
+  Stream.Emit(0, naclbitc::BlockSizeWidth);
+  unsigned BytesForHeader = 0;
+
+  unsigned NumberFields = Fields.size();
+  if (NumberFields > 0xFFFF)
+    report_fatal_error("Too many header fields");
+
+  uint8_t Buffer[kMaxVariableFieldSize];
+  for (std::vector<NaClBitcodeHeaderField*>::const_iterator
+           Iter = Fields.begin(), IterEnd = Fields.end();
+       Iter != IterEnd; ++Iter) {
+    if (!(*Iter)->Write(Buffer, kMaxVariableFieldSize))
+      report_fatal_error("Header field too big to generate");
+    size_t limit = (*Iter)->GetTotalSize();
+    for (size_t i = 0; i < limit; i++) {
+      Stream.Emit(Buffer[i], 8);
+    }
+    BytesForHeader += limit;
+  }
+
+  if (BytesForHeader > 0xFFFF)
+    report_fatal_error("Header fields to big to save");
+
+  // Encode #fields in top two bytes, and #bytes to hold fields in
+  // bottom two bytes. Then backpatch into second word.
+  unsigned Value = NumberFields | (BytesForHeader << 16);
+  Stream.BackpatchWord(NaClBitcodeHeader::WordSize, Value);
+}
+
+// Define the version of PNaCl bitcode we are generating.
+static const uint16_t kPNaClVersion = 1;
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+  SmallVector<char, 0> Buffer;
+  Buffer.reserve(256*1024);
+
+  // Emit the module into the buffer.
+  {
+    NaClBitstreamWriter Stream(Buffer);
+
+    // Emit the file header.
+    Stream.Emit((unsigned)'P', 8);
+    Stream.Emit((unsigned)'E', 8);
+    Stream.Emit((unsigned)'X', 8);
+    Stream.Emit((unsigned)'E', 8);
+
+    // Collect header fields to add.
+    {
+      std::vector<NaClBitcodeHeaderField*> HeaderFields;
+      HeaderFields.push_back(
+          new NaClBitcodeHeaderField(NaClBitcodeHeaderField::kPNaClVersion,
+                                     kPNaClVersion));
+      WriteHeaderFields(HeaderFields, Stream);
+    }
+
+    // Emit the module.
+    WriteModule(M, Stream);
+  }
+
+  // Write the generated bitstream to "Out".
+  Out.write((char*)&Buffer.front(), Buffer.size());
+}
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
new file mode 100644
index 0000000000..5e2484a3f3
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -0,0 +1,443 @@
+//===-- NaClValueEnumerator.cpp ------------------------------------------===//
+//     Number values and types for bitcode writer
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NaClValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NaClValueEnumerator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
+  return V.first->getType()->isIntOrIntVectorTy();
+}
+
+/// NaClValueEnumerator - Enumerate module-level information.
+NaClValueEnumerator::NaClValueEnumerator(const Module *M) {
+  // Create map for counting frequency of types, and set field
+  // TypeCountMap accordingly.  Note: Pointer field TypeCountMap is
+  // used to deal with the fact that types are added through various
+  // method calls in this routine. Rather than pass it as an argument,
+  // we use a field. The field is a pointer so that the memory
+  // footprint of count_map can be garbage collected when this
+  // constructor completes.
+  TypeCountMapType count_map;
+  TypeCountMap = &count_map;
+
+  // Enumerate the functions. Note: We do this before global
+  // variables, so that global variable initializations can refer to
+  // the functions without a forward reference.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    EnumerateValue(I);
+  }
+
+  // Enumerate the global variables.
+  FirstGlobalVarID = Values.size();
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    EnumerateValue(I);
+  NumGlobalVarIDs = Values.size() - FirstGlobalVarID;
+
+  // Enumerate the aliases.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  // Remember what is the cutoff between globalvalue's and other constants.
+  unsigned FirstConstant = Values.size();
+
+  // Skip global variable initializers since they are handled within
+  // WriteGlobalVars of file NaClBitcodeWriter.cpp.
+
+  // Enumerate the aliasees.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I->getAliasee());
+
+  // Insert constants that are named at module level into the slot
+  // pool so that the module symbol table can refer to them...
+  EnumerateValueSymbolTable(M->getValueSymbolTable());
+
+  // Enumerate types used by function bodies and argument lists.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I)
+      EnumerateType(I->getType());
+
+    for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+             OI != E; ++OI) {
+          EnumerateOperandType(*OI);
+        }
+        EnumerateType(I->getType());
+      }
+  }
+
+  // Optimized type indicies to put "common" expected types in with small
+  // indices.
+  OptimizeTypes(M);
+  TypeCountMap = NULL;
+
+  // Optimize constant ordering.
+  OptimizeConstants(FirstConstant, Values.size());
+}
+
+void NaClValueEnumerator::OptimizeTypes(const Module *M) {
+
+  // Sort types by count, so that we can index them based on
+  // frequency. Use indices of built TypeMap, so that order of
+  // construction is repeatable.
+  std::set<unsigned> type_counts;
+  typedef std::set<unsigned> TypeSetType;
+  std::map<unsigned, TypeSetType> usage_count_map;
+  TypeList IdType(Types);
+
+  for (TypeCountMapType::iterator iter = TypeCountMap->begin();
+       iter != TypeCountMap->end(); ++ iter) {
+    type_counts.insert(iter->second);
+    usage_count_map[iter->second].insert(TypeMap[iter->first]-1);
+  }
+
+  // Reset type tracking maps, so that we can re-enter based
+  // on fequency ordering.
+  TypeCountMap = NULL;
+  Types.clear();
+  TypeMap.clear();
+
+  // Reinsert types, based on frequency.
+  for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();
+       count_iter != type_counts.rend(); ++count_iter) {
+    TypeSetType& count_types = usage_count_map[*count_iter];
+    for (TypeSetType::iterator type_iter = count_types.begin();
+         type_iter != count_types.end(); ++type_iter)
+      EnumerateType((IdType[*type_iter]), true);
+  }
+}
+
+unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {
+  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+  assert(I != InstructionMap.end() && "Instruction is not mapped!");
+  return I->second;
+}
+
+void NaClValueEnumerator::setInstructionID(const Instruction *I) {
+  InstructionMap[I] = InstructionCount++;
+}
+
+unsigned NaClValueEnumerator::getValueID(const Value *V) const {
+  ValueMapType::const_iterator I = ValueMap.find(V);
+  assert(I != ValueMap.end() && "Value not in slotcalculator!");
+  return I->second-1;
+}
+
+void NaClValueEnumerator::dump() const {
+  print(dbgs(), ValueMap, "Default");
+  dbgs() << '\n';
+}
+
+void NaClValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+                            const char *Name) const {
+
+  OS << "Map Name: " << Name << "\n";
+  OS << "Size: " << Map.size() << "\n";
+  for (ValueMapType::const_iterator I = Map.begin(),
+         E = Map.end(); I != E; ++I) {
+
+    const Value *V = I->first;
+    if (V->hasName())
+      OS << "Value: " << V->getName();
+    else
+      OS << "Value: [null]\n";
+    V->dump();
+
+    OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      if (UI != V->use_begin())
+        OS << ",";
+      if((*UI)->hasName())
+        OS << " " << (*UI)->getName();
+      else
+        OS << " [null]";
+
+    }
+    OS <<  "\n\n";
+  }
+}
+
+// Optimize constant ordering.
+namespace {
+  struct CstSortPredicate {
+    NaClValueEnumerator &VE;
+    explicit CstSortPredicate(NaClValueEnumerator &ve) : VE(ve) {}
+    bool operator()(const std::pair<const Value*, unsigned> &LHS,
+                    const std::pair<const Value*, unsigned> &RHS) {
+      // Sort by plane.
+      if (LHS.first->getType() != RHS.first->getType())
+        return VE.getTypeID(LHS.first->getType()) <
+               VE.getTypeID(RHS.first->getType());
+      // Then by frequency.
+      return LHS.second > RHS.second;
+    }
+  };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void NaClValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+  CstSortPredicate P(*this);
+  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+  // Ensure that integer and vector of integer constants are at the start of the
+  // constant pool.  This is important so that GEP structure indices come before
+  // gep constant exprs.
+  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+                 isIntOrIntVectorValue);
+
+  // Rebuild the modified portion of ValueMap.
+  for (; CstStart != CstEnd; ++CstStart)
+    ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void NaClValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+       VI != VE; ++VI)
+    EnumerateValue(VI->getValue());
+}
+
+void NaClValueEnumerator::EnumerateValue(const Value *V) {
+  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+  assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+         "EnumerateValue doesn't handle Metadata!");
+
+  // Check to see if it's already in!
+  unsigned &ValueID = ValueMap[V];
+  if (ValueID) {
+    // Increment use count.
+    Values[ValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value. Skip global values since no
+  // types are dumped for global variables.
+  if (!isa<GlobalVariable>(V))
+    EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<GlobalValue>(C)) {
+      // Initializers for globals are handled explicitly elsewhere.
+    } else if (C->getNumOperands()) {
+      // If a constant has operands, enumerate them.  This makes sure that if a
+      // constant has uses (for example an array of const ints), that they are
+      // inserted also.
+
+      // We prefer to enumerate them with values before we enumerate the user
+      // itself.  This makes it more likely that we can avoid forward references
+      // in the reader.  We know that there can be no cycles in the constants
+      // graph that don't go through a global variable.
+      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+           I != E; ++I)
+        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+          EnumerateValue(*I);
+
+      // Finally, add the value.  Doing this could make the ValueID reference be
+      // dangling, don't reuse it.
+      Values.push_back(std::make_pair(V, 1U));
+      ValueMap[V] = Values.size();
+      return;
+    }
+  }
+
+  // Add the value.
+  Values.push_back(std::make_pair(V, 1U));
+  ValueID = Values.size();
+}
+
+
+void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) {
+  // This function is used to enumerate types referenced by the given
+  // module. This function is called in two phases, based on the value
+  // of TypeCountMap. These phases are:
+  //
+  // (1) In this phase, InsideOptimizeTypes=false. We are collecting types
+  // and all corresponding (implicitly) referenced types. In addition,
+  // we are keeping track of the number of references to each type in
+  // TypeCountMap. These reference counts will be used by method
+  // OptimizeTypes to associate the smallest type ID's with the most
+  // referenced types.
+  //
+  // (2) In this phase, InsideOptimizeTypes=true. We are registering types
+  // based on frequency. To minimize type IDs for frequently used
+  // types, (unlike the other context) we are inserting the minimal
+  // (implicitly) referenced types needed for each type.
+  unsigned *TypeID = &TypeMap[Ty];
+
+  if (TypeCountMap) ++((*TypeCountMap)[Ty]);
+
+  // We've already seen this type.
+  if (*TypeID)
+    return;
+
+  // If it is a non-anonymous struct, mark the type as being visited so that we
+  // don't recursively visit it.  This is safe because we allow forward
+  // references of these in the bitcode reader.
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isLiteral())
+      *TypeID = ~0U;
+
+  // If in the second phase (i.e. inside optimize types), don't expand
+  // pointers to structures, since we can just generate a forward
+  // reference to it. This way, we don't use up unnecessary (small) ID
+  // values just to define the pointer.
+  bool EnumerateSubtypes = true;
+  if (InsideOptimizeTypes)
+    if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+      if (StructType *STy = dyn_cast<StructType>(PTy->getElementType()))
+        if (!STy->isLiteral())
+          EnumerateSubtypes = false;
+
+  // Enumerate all of the subtypes before we enumerate this type.  This ensures
+  // that the type will be enumerated in an order that can be directly built.
+  if (EnumerateSubtypes) {
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      EnumerateType(*I, InsideOptimizeTypes);
+  }
+
+  // Refresh the TypeID pointer in case the table rehashed.
+  TypeID = &TypeMap[Ty];
+
+  // Check to see if we got the pointer another way.  This can happen when
+  // enumerating recursive types that hit the base case deeper than they start.
+  //
+  // If this is actually a struct that we are treating as forward ref'able,
+  // then emit the definition now that all of its contents are available.
+  if (*TypeID && *TypeID != ~0U)
+    return;
+
+  // Add this type now that its contents are all happily enumerated.
+  Types.push_back(Ty);
+
+  *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value.  If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void NaClValueEnumerator::EnumerateOperandType(const Value *V) {
+  EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    // If this constant is already enumerated, ignore it, we know its type must
+    // be enumerated.
+    if (ValueMap.count(V)) return;
+
+    // This constant may have operands, make sure to enumerate the types in
+    // them.
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      const Value *Op = C->getOperand(i);
+
+      // Don't enumerate basic blocks here, this happens as operands to
+      // blockaddress.
+      if (isa<BasicBlock>(Op)) continue;
+
+      EnumerateOperandType(Op);
+    }
+  }
+}
+
+void NaClValueEnumerator::incorporateFunction(const Function &F) {
+  InstructionCount = 0;
+  NumModuleValues = Values.size();
+
+  // Make sure no insertions outside of a function.
+  assert(FnForwardTypeRefs.empty());
+
+  // Adding function arguments to the value table.
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  FirstFuncConstantID = Values.size();
+
+  // Add all function-level constants to the value table.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+            isa<InlineAsm>(*OI))
+          EnumerateValue(*OI);
+      }
+    BasicBlocks.push_back(BB);
+    ValueMap[BB] = BasicBlocks.size();
+  }
+
+  // Optimize the constant layout.
+  OptimizeConstants(FirstFuncConstantID, Values.size());
+
+  FirstInstID = Values.size();
+
+  // Add all of the instructions.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      if (!I->getType()->isVoidTy())
+        EnumerateValue(I);
+    }
+  }
+}
+
+void NaClValueEnumerator::purgeFunction() {
+  /// Remove purged values from the ValueMap.
+  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+    ValueMap.erase(Values[i].first);
+  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+    ValueMap.erase(BasicBlocks[i]);
+
+  Values.resize(NumModuleValues);
+  BasicBlocks.clear();
+  FnForwardTypeRefs.clear();
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
+  unsigned Counter = 0;
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block.  This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned NaClValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+  unsigned &Idx = GlobalBasicBlockIDs[BB];
+  if (Idx != 0)
+    return Idx-1;
+
+  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+  return getGlobalBasicBlockID(BB);
+}
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
new file mode 100644
index 0000000000..e98a091ba8
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
@@ -0,0 +1,157 @@
+//===-- Bitcode/NaCl/Writer/NaClValueEnumerator.h - ----------*- C++ -*-===//
+//      Number values.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NACL_VALUE_ENUMERATOR_H
+#define NACL_VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class ValueSymbolTable;
+class raw_ostream;
+
+class NaClValueEnumerator {
+public:
+  typedef std::vector<Type*> TypeList;
+
+  // For each value, we remember its Value* and occurrence frequency.
+  typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+  // Defines unique ID's for each type.
+  typedef DenseMap<Type*, unsigned> TypeMapType;
+  TypeMapType TypeMap;
+  // Defines the number of references to each type. If defined,
+  // we are in the first pass of collecting types, and reference counts
+  // should be added to the map. If undefined, we are in the second pass
+  // that actually assigns type IDs, based on frequency counts found in
+  // the first pass.
+  typedef TypeMapType TypeCountMapType;
+  TypeCountMapType* TypeCountMap;
+
+  TypeList Types;
+
+  typedef DenseMap<const Value*, unsigned> ValueMapType;
+  ValueMapType ValueMap;
+  ValueList Values;
+
+  /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+  /// the "getGlobalBasicBlockID" method.
+  mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+
+  typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+  InstructionMapType InstructionMap;
+  unsigned InstructionCount;
+
+  /// BasicBlocks - This contains all the basic blocks for the currently
+  /// incorporated function.  Their reverse mapping is stored in ValueMap.
+  std::vector<const BasicBlock*> BasicBlocks;
+
+  /// When a function is incorporated, this is the size of the Values list
+  /// before incorporation.
+  unsigned NumModuleValues;
+
+  unsigned FirstFuncConstantID;
+  unsigned FirstInstID;
+
+  /// Holds values that have been forward referenced within a function.
+  /// Used to make sure we don't generate more forward reference declarations
+  /// than necessary.
+  SmallSet<unsigned, 32> FnForwardTypeRefs;
+
+  // The index of the first global variable ID in the bitcode file.
+  unsigned FirstGlobalVarID;
+  // The number of global variable IDs defined in the bitcode file.
+  unsigned NumGlobalVarIDs;
+
+  NaClValueEnumerator(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
+public:
+  NaClValueEnumerator(const Module *M);
+
+  void dump() const;
+  void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
+  unsigned getFirstGlobalVarID() const {
+    return FirstGlobalVarID;
+  }
+
+  unsigned getNumGlobalVarIDs() const {
+    return NumGlobalVarIDs;
+  }
+
+  unsigned getValueID(const Value *V) const;
+
+  unsigned getTypeID(Type *T) const {
+    TypeMapType::const_iterator I = TypeMap.find(T);
+    assert(I != TypeMap.end() && "Type not in NaClValueEnumerator!");
+    return I->second-1;
+  }
+
+  unsigned getInstructionID(const Instruction *I) const;
+  void setInstructionID(const Instruction *I);
+
+  /// getFunctionConstantRange - Return the range of values that corresponds to
+  /// function-local constants.
+  void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+    Start = FirstFuncConstantID;
+    End = FirstInstID;
+  }
+
+  /// \brief Inserts the give value into the set of known function forward
+  /// value type refs. Returns true if the value id is added to the set.
+  bool InsertFnForwardTypeRef(unsigned ValID) {
+    return FnForwardTypeRefs.insert(ValID);
+  }
+
+  const ValueList &getValues() const { return Values; }
+  const TypeList &getTypes() const { return Types; }
+  const std::vector<const BasicBlock*> &getBasicBlocks() const {
+    return BasicBlocks;
+  }
+
+  /// getGlobalBasicBlockID - This returns the function-specific ID for the
+  /// specified basic block.  This is relatively expensive information, so it
+  /// should only be used by rare constructs such as address-of-label.
+  unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+  /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+  /// use these two methods to get its data into the NaClValueEnumerator!
+  ///
+  void incorporateFunction(const Function &F);
+  void purgeFunction();
+
+private:
+  void OptimizeTypes(const Module *M);
+  void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+
+  void EnumerateValue(const Value *V);
+  void EnumerateType(Type *T, bool InsideOptimizeTypes=false);
+  void EnumerateOperandType(const Value *V);
+
+  void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index e6ff4b43b1..a7434938b4 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1610,6 +1610,14 @@ bool BitcodeReader::ParseModule(bool Resume) {
       std::string S;
       if (ConvertToString(Record, 0, S))
         return Error("Invalid MODULE_CODE_TRIPLE record");
+
+      // @LOCALMOD-BEGIN
+      // This hack is needed in order to get Clang compiled binaries
+      // working with the Gold plugin, until PNaCl backend is introduced
+      // in lib/Target/PNaCl.
+      if (S == "le32-unknown-nacl")
+        S = "armv7-none-linux-gnueabi";
+      // @LOCALMOD-END
       TheModule->setTargetTriple(S);
       break;
     }
@@ -2919,6 +2927,16 @@ bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
   const Function *F = dyn_cast<Function>(GV);
   if (!F || F->isDeclaration())
     return false;
+  // @LOCALMOD-START
+  // Don't dematerialize functions with BBs which have their address taken;
+  // it will cause any referencing blockAddress constants to also be destroyed,
+  // but because they are GVs, they need to stay around until PassManager
+  // finalization.
+  for (Function::const_iterator BB = F->begin(); BB != F->end(); ++BB) {
+    if (BB->hasAddressTaken())
+      return false;
+  }
+  // @LOCALMOD-END
   return DeferredFunctionInfo.count(const_cast<Function*>(F));
 }
 
@@ -3068,6 +3086,9 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
     return 0;
   }
   R->setBufferOwned(false); // no buffer to delete
+
+  R->materializeForwardReferencedFunctions();
+
   return M;
 }
 
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 76ebe9aca9..e2a5c54756 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -15,3 +15,4 @@ add_subdirectory(ExecutionEngine)
 add_subdirectory(Target)
 add_subdirectory(AsmParser)
 add_subdirectory(Archive)
+add_subdirectory(Wrap)
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 84162ace41..c73071e12b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -159,6 +159,11 @@ bool AsmPrinter::doInitialization(Module &M) {
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   MMI->AnalyzeModule(M);
 
+  // @LOCALMOD-BEGIN
+  IsPlainObject =
+    (MMI->getModule()->getOutputFormat() == Module::ObjectOutputFormat);
+  // @LOCALMOD-END
+
   // Initialize TargetLoweringObjectFile.
   const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
     .Initialize(OutContext, TM);
@@ -275,6 +280,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   MCSymbol *GVSym = Mang->getSymbol(GV);
   EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
 
+  // @LOCALMOD-BEGIN
+  // For .pexe and .pso files, emit ELF type STT_OBJECT or STT_TLS instead
+  // of NOTYPE for undefined symbols.
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527
+  if (!GV->hasInitializer() && !IsPlainObject) {
+    OutStreamer.EmitSymbolAttribute(GVSym,
+                                    GV->isThreadLocal() ? MCSA_ELF_TypeTLS
+                                                        : MCSA_ELF_TypeObject);
+  }
+  // @LOCALMOD-END
+
   if (!GV->hasInitializer())   // External globals require no extra code.
     return;
 
@@ -689,9 +705,14 @@ void AsmPrinter::EmitFunctionBody() {
         break;
 
       case TargetOpcode::EH_LABEL:
-      case TargetOpcode::GC_LABEL:
+      case TargetOpcode::GC_LABEL: {
+        // @LOCALMOD-START
+        unsigned LabelAlign = GetTargetLabelAlign(II);
+        if (LabelAlign) EmitAlignment(LabelAlign);
+        // @LOCALMOD-END
         OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
         break;
+      }
       case TargetOpcode::INLINEASM:
         EmitInlineAsm(II);
         break;
@@ -856,6 +877,16 @@ bool AsmPrinter::doFinalization(Module &M) {
     const Function &F = *I;
     if (!F.isDeclaration())
       continue;
+
+    // @LOCALMOD-BEGIN
+    // For .pexe and .pso files, emit STT_FUNC for function declarations.
+    // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2527
+    if (!IsPlainObject) {
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(&F),
+                                      MCSA_ELF_TypeFunction);
+    }
+    // @LOCALMOD-END
+
     GlobalValue::VisibilityTypes V = F.getVisibility();
     if (V == GlobalValue::DefaultVisibility)
       continue;
@@ -1075,12 +1106,25 @@ void AsmPrinter::EmitJumpTableInfo() {
   if (// In PIC mode, we need to emit the jump table to the same section as the
       // function body itself, otherwise the label differences won't make sense.
       // FIXME: Need a better predicate for this: what about custom entries?
-      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+      (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
       // We should also do if the section name is NULL or function is declared
       // in discardable section
       // FIXME: this isn't the right predicate, should be based on the MCSection
       // for the function.
-      F->isWeakForLinker()) {
+      // @LOCALMOD-START
+      // the original code is a hack
+      // jumptables usually end up in .rodata
+      // but for functions with weak linkage there is a chance that the are
+      // not needed. So in order to be discard the function AND the jumptable
+      // they keep them both in .text. This fix only works if we never discard
+      // weak functions. This is guaranteed because the bitcode linker already
+      // throws out unused ones.
+      // TODO: Investigate the other case of concern -- PIC code.
+      // Concern is about jumptables being in a different section: can the
+      // rodata and text be too far apart for a RIP-relative offset?
+       F->isWeakForLinker())
+      && !UseReadOnlyJumpTables()) {
+      // @LOCALMOD-END
     OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
   } else {
     // Otherwise, drop it in the readonly section.
@@ -1107,7 +1151,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     // .set directive for each unique entry.  This reduces the number of
     // relocations the assembler will generate for the jump table.
     if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
-        MAI->hasSetDirective()) {
+        MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD
       SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
       const TargetLowering *TLI = TM.getTargetLowering();
       const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
@@ -1190,7 +1234,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
     // If we have emitted set directives for the jump table entries, print
     // them rather than the entries themselves.  If we're emitting PIC, then
     // emit the table entries as differences between two text section labels.
-    if (MAI->hasSetDirective()) {
+    if (MAI->hasSetDirective() && !UseReadOnlyJumpTables()) { // @LOCALMOD
       // If we used .set, reference the .set's symbol.
       Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
                                       OutContext);
@@ -1210,7 +1254,6 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
   OutStreamer.EmitValue(Value, EntrySize);
 }
 
-
 /// EmitSpecialLLVMGlobal - Check to see if the specified global is a
 /// special global used by LLVM.  If so, emit it and return true, otherwise
 /// do nothing and return false.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index abfa330fa2..e2425d742b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -70,6 +70,11 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
 /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
 void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
                                InlineAsm::AsmDialect Dialect) const {
+#if defined(__native_client__)
+  // Prune the generic AsmParser bits from the in-browser translator.
+  // This is normally used to parse inline asm (see createMCAsmParser below).
+  return;
+#else
   assert(!Str.empty() && "Can't emit empty inline asm block");
 
   // Remember if the buffer is nul terminated or not so we can avoid a copy.
@@ -135,6 +140,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
                         /*NoFinalize*/ true);
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
+#endif  // defined(__native_client__)
 }
 
 static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
@@ -411,6 +417,10 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
 /// EmitInlineAsm - This method formats and emits the specified machine
 /// instruction that is an inline asm.
 void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+#if defined(__native__client__)
+  // See above LOCALMOD for pruning generic AsmParsing.
+  return;
+#else
   assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
 
   // Count the number of register definitions to find the asm string.
@@ -480,6 +490,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
   if (OutStreamer.hasRawTextSupport())
     OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
                             MAI->getInlineAsmEnd());
+#endif  // __native_client__
 }
 
 
@@ -550,4 +561,3 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   // Target doesn't support this yet!
   return true;
 }
-
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index f8cc3b3999..43b5c7d78d 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h" //  @LOCALMOD
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -233,6 +234,21 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
       }
   }
 
+    // @LOCALMOD-START
+    // This currently only used on ARM targets where the ConstantPool
+    // subclass is overloading getJumpTableIndex()
+    const std::vector<MachineConstantPoolEntry>& CPs =
+      MF.getConstantPool()->getConstants();
+    for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+      if (!CPs[i].isMachineConstantPoolEntry()) continue;
+      unsigned *JTIndex = CPs[i].Val.MachineCPVal->getJumpTableIndex();
+      if (!JTIndex) continue;
+      // Remember that this JT is live.
+      JTIsLive.set(*JTIndex);
+    }
+    // @LOCALMOD-END
+
+
   // Finally, remove dead jump tables.  This happens when the
   // indirect jump was unreachable (and thus deleted).
   for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 75f4b96e3b..d4cc1a8654 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -33,6 +33,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
   StackOffset = 0;
 
   clearByValRegsInfo();
+  clearHasByValInRegPosition(); // @LOCALMOD.
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
 }
 
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 78e9950e5e..71a377df09 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -170,7 +170,8 @@ MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
   iterator B = begin(), E = end(), I = E;
-  while (I != B && ((--I)->isTerminator() || I->isDebugValue()))
+  while (I != B && ((--I)->isTerminator() || I->isDebugValue()
+      || I->getOpcode() == TargetOpcode::BUNDLE_UNLOCK)) // @LOCALMOD
     ; /*noop */
   while (I != E && !I->isTerminator())
     ++I;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 959dd7df58..337b9790a5 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -29,6 +29,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h" // @LOCALMOD (upstreamable)
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
 #include "llvm/IR/InlineAsm.h"
@@ -214,7 +215,9 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
   std::vector<CalleeSavedInfo> CSI;
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
-    if (F.getRegInfo().isPhysRegUsed(Reg)) {
+    // @LOCALMOD (but upstreamable)
+    // Functions which call __builtin_unwind_init get all their registers saved.
+    if (F.getRegInfo().isPhysRegUsed(Reg) || F.getMMI().callsUnwindInit()) {
       // If the reg is modified, save it!
       CSI.push_back(CalleeSavedInfo(Reg));
     }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 67db211ec4..9d02fc7323 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5191,6 +5191,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::donothing:
     // ignore
     return 0;
+  // @LOCALMOD-BEGIN
+  // Native Client Intrinsics for TLS setup / layout.
+  case Intrinsic::nacl_tp_tls_offset: {
+    SDValue tls_size = getValue(I.getArgOperand(0));
+    setValue(&I, DAG.getNode(ISD::NACL_TP_TLS_OFFSET, dl,
+                             tls_size.getValueType(),
+                             tls_size));
+    return 0;
+  }
+  case Intrinsic::nacl_tp_tdb_offset: {
+    SDValue tdb_size = getValue(I.getArgOperand(0));
+    setValue(&I, DAG.getNode(ISD::NACL_TP_TDB_OFFSET, dl,
+                             tdb_size.getValueType(),
+                             tdb_size));
+    return 0;
+  }
+  case Intrinsic::nacl_target_arch: {
+    EVT DestVT = TLI.getValueType(I.getType());
+    setValue(&I, DAG.getNode(ISD::NACL_TARGET_ARCH, dl, DestVT));
+    return 0;
+  }
+  // @LOCALMOD-END
   }
 }
 
@@ -6376,7 +6398,10 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
                            DAG.getSrcValue(I.getOperand(0)),
-                           TD.getABITypeAlignment(I.getType()));
+// @LOCALMOD-BEGIN
+                           TD.getCallFrameTypeAlignment(I.getType()));
+// @LOCALMOD-END
+
   setValue(&I, V);
   DAG.setRoot(V.getValue(1));
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 47b0391850..50b5bccf7c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -313,6 +313,13 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     case ISD::SETFALSE:                 return "setfalse";
     case ISD::SETFALSE2:                return "setfalse2";
     }
+
+  // @LOCALMOD-BEGIN
+  // NaCl intrinsics for TLS setup
+  case ISD::NACL_TP_TLS_OFFSET:        return "nacl_tls_offset";
+  case ISD::NACL_TP_TDB_OFFSET:        return "nacl_tdb_offset";
+  case ISD::NACL_TARGET_ARCH:          return "nacl_target_arch";
+  // @LOCALMOD-END
   }
 }
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index e21f26e91c..02b838234d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -585,7 +585,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   }
   DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
-
   if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
 
   // Run the DAG combiner in pre-legalize mode.
@@ -614,7 +613,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   if (Changed) {
     if (ViewDAGCombineLT)
       CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
-
     // Run the DAG combiner in post-type-legalize mode.
     {
       NamedRegionTimer T("DAG Combining after legalize types", GroupName,
@@ -636,10 +634,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
       NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
       CurDAG->LegalizeTypes();
     }
-
     if (ViewDAGCombineLT)
       CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
-
     // Run the DAG combiner in post-type-legalize mode.
     {
       NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
@@ -650,19 +646,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
           << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
   }
-
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
-
   {
     NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
     CurDAG->Legalize();
   }
-
   DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
         << " '" << BlockName << "'\n"; CurDAG->dump());
 
   if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
-
   // Run the DAG combiner in post-legalize mode.
   {
     NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index a789a2596d..4b7e4609e5 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -428,17 +428,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
     }
 
     // Create the interval of the blocks that we previously found to be 'alive'.
-    BitVector Alive = BlockLiveness[MBB].LiveIn;
-    Alive |= BlockLiveness[MBB].LiveOut;
-
-    if (Alive.any()) {
-      for (int pos = Alive.find_first(); pos != -1;
-           pos = Alive.find_next(pos)) {
-        if (!Starts[pos].isValid())
-          Starts[pos] = Indexes->getMBBStartIdx(MBB);
-        if (!Finishes[pos].isValid())
-          Finishes[pos] = Indexes->getMBBEndIdx(MBB);
-      }
+    BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB];
+    for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
+         pos = MBBLiveness.LiveIn.find_next(pos)) {
+      Starts[pos] = Indexes->getMBBStartIdx(MBB);
+    }
+    for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1;
+         pos = MBBLiveness.LiveOut.find_next(pos)) {
+      Finishes[pos] = Indexes->getMBBEndIdx(MBB);
     }
 
     for (unsigned i = 0; i < NumSlots; ++i) {
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 7e7359a8fe..f91688531b 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -54,8 +54,16 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
   case dwarf::DW_EH_PE_absptr:
     return  Mang->getSymbol(GV);
   case dwarf::DW_EH_PE_pcrel: {
+    // @LOCALMOD-BEGIN
+    // The dwarf section label should not include the version suffix.
+    // Strip it off here.
+    StringRef Name = Mang->getSymbol(GV)->getName();
+    size_t atpos = Name.find("@");
+    if (atpos != StringRef::npos)
+      Name = Name.substr(0, atpos);
+    // @LOCALMOD-END
     return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
-                                          Mang->getSymbol(GV)->getName());
+                                          Name); // @LOCALMOD
   }
   }
 }
@@ -64,7 +72,15 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const TargetMachine &TM,
                                                        const MCSymbol *Sym) const {
   SmallString<64> NameData("DW.ref.");
-  NameData += Sym->getName();
+  // @LOCALMOD-BEGIN
+  // The dwarf section label should not include the version suffix.
+  // Strip it off here.
+  StringRef Name = Sym->getName();
+  size_t atpos = Name.find("@");
+  if (atpos != StringRef::npos)
+    Name = Name.substr(0, atpos);
+  // @LOCALMOD-END
+  NameData += Name; // @LOCALMOD
   MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index bef4bbf660..b08de554e3 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -322,7 +322,9 @@ GenericValue lle_X_abort(FunctionType *FT,
                          const std::vector<GenericValue> &Args) {
   //FIXME: should we report or raise here?
   //report_fatal_error("Interpreted program raised SIGABRT");
-  raise (SIGABRT);
+  //TODO(dschuff) fixme or figure out how to get raise()
+  abort(); // @LOCALMOD 
+  //raise (SIGABRT);
   return GenericValue();
 }
 
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 7761127d55..994cb534f5 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -2168,6 +2168,7 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
   SlotTracker SlotTable(this);
   formatted_raw_ostream OS(ROS);
   AssemblyWriter W(OS, SlotTable, this, AAW);
+  convertLibraryListToMetadata(); // @LOCALMOD
   W.printModule(this);
 }
 
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 2c6971c83e..2d67c0c943 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -1391,7 +1391,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
   BasicBlock *NewBB = getBasicBlock();
 
   if (U == &Op<0>())
-    NewF = cast<Function>(To);
+    NewF = cast<Function>(To->stripPointerCasts()); // @LOCALMOD
   else
     NewBB = cast<BasicBlock>(To);
 
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 6d547f3edf..b0deda9906 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -46,6 +46,116 @@ void GlobalValue::destroyConstant() {
   llvm_unreachable("You can't GV->destroyConstant()!");
 }
 
+// @LOCALMOD-BEGIN
+
+// Extract the version information from GV.
+static void ExtractVersion(const GlobalValue *GV,
+                           StringRef *Name,
+                           StringRef *Ver,
+                           bool *IsDefault) {
+  // The version information is stored in the GlobalValue's name, e.g.:
+  //
+  //     GV Name      Name  Ver  IsDefault
+  //    ------------------------------------
+  //     foo@@V1 -->  foo   V1     true
+  //     bar@V2  -->  bar   V2     false
+  //     baz     -->  baz          false
+
+  StringRef GVName = GV->getName();
+  size_t atpos = GVName.find("@");
+  if (atpos == StringRef::npos) {
+    *Name = GVName;
+    *Ver = "";
+    *IsDefault = false;
+    return;
+  }
+  *Name = GVName.substr(0, atpos);
+  ++atpos;
+  if (atpos < GVName.size() && GVName[atpos] == '@') {
+    *IsDefault = true;
+    ++atpos;
+  } else {
+    *IsDefault = false;
+  }
+  *Ver = GVName.substr(atpos);
+}
+
+// Set the version information on GV.
+static void SetVersion(Module *M,
+                       GlobalValue *GV,
+                       StringRef Ver,
+                       bool IsDefault) {
+  StringRef Name;
+  StringRef PrevVersion;
+  bool PrevIsDefault;
+  ExtractVersion(GV, &Name, &PrevVersion, &PrevIsDefault);
+
+  // If this symbol already has a version, make sure it matches.
+  if (!PrevVersion.empty()) {
+    if (!PrevVersion.equals(Ver) || PrevIsDefault != IsDefault) {
+      llvm_unreachable("Trying to override symbol version info!");
+    }
+    return;
+  }
+  // If there's no version to set, there's nothing to do.
+  if (Ver.empty())
+    return;
+
+  // Make sure the versioned symbol name doesn't already exist.
+  std::string NewName = Name.str() + (IsDefault ? "@@" : "@") + Ver.str();
+  if (M->getNamedValue(NewName)) {
+    // It may make sense to do this as long as one of the globals being
+    // merged is only a declaration. But since this situation seems to be
+    // a corner case, for now it is unimplemented.
+    llvm_unreachable("Merging unversioned global into "
+                     "existing versioned global is unimplemented");
+  }
+  GV->setName(NewName);
+}
+
+StringRef GlobalValue::getUnversionedName() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  return Name;
+}
+
+StringRef GlobalValue::getVersion() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  return Ver;
+}
+
+bool GlobalValue::isDefaultVersion() const {
+  StringRef Name;
+  StringRef Ver;
+  bool IsDefaultVersion;
+  ExtractVersion(this, &Name, &Ver, &IsDefaultVersion);
+  // It is an error to call this function on an unversioned symbol.
+  assert(!Ver.empty());
+  return IsDefaultVersion;
+}
+
+void GlobalValue::setVersionDef(StringRef Version, bool IsDefault) {
+  // This call only makes sense for definitions.
+  assert(!isDeclaration());
+  SetVersion(Parent, this, Version, IsDefault);
+}
+
+void GlobalValue::setNeeded(StringRef Version, StringRef DynFile) {
+  // This call makes sense on declarations or
+  // available-externally definitions.
+  // TODO(pdox): If this is a definition, should we turn it
+  //             into a declaration here?
+  assert(isDeclaration() || hasAvailableExternallyLinkage());
+  SetVersion(Parent, this, Version, false);
+  Parent->addNeededRecord(DynFile, this);
+}
+// @LOCALMOD-END
+
 /// copyAttributesFrom - copy all additional attributes (those not needed to
 /// create a GlobalValue) from the GlobalValue Src to this one.
 void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 8affcc9469..4cb93d1fe6 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ErrorHandling.h" // @LOCALMOD
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -55,6 +56,7 @@ Module::~Module() {
   GlobalList.clear();
   FunctionList.clear();
   AliasList.clear();
+  LibraryList.clear(); // @LOCALMOD
   NamedMDList.clear();
   delete ValSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
@@ -449,3 +451,228 @@ void Module::dropAllReferences() {
   for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
     I->dropAllReferences();
 }
+
+// @LOCALMOD-BEGIN
+void Module::convertMetadataToLibraryList() {
+  LibraryList.clear();
+  // Get the DepLib node
+  NamedMDNode *Node = getNamedMetadata("DepLibs");
+  if (!Node)
+    return;
+  for (unsigned i = 0; i < Node->getNumOperands(); i++) {
+    MDString* Mds = dyn_cast_or_null<MDString>(
+        Node->getOperand(i)->getOperand(0));
+    assert(Mds && "Bad NamedMetadata operand");
+    LibraryList.push_back(Mds->getString());
+  }
+  // Clear the metadata so the linker won't try to merge it
+  Node->dropAllReferences();
+}
+
+void Module::convertLibraryListToMetadata() const {
+  if (LibraryList.size() == 0)
+    return;
+  // Get the DepLib node
+  NamedMDNode *Node = getNamedMetadata("DepLibs");
+  assert(Node && "DepLibs metadata node missing");
+  // Erase all existing operands
+  Node->dropAllReferences();
+  // Add all libraries from the library list
+  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) {
+    MDString *value = MDString::get(getContext(), *I);
+    Node->addOperand(MDNode::get(getContext(),
+                                 makeArrayRef(static_cast<Value*>(value))));
+  }
+}
+
+void Module::addLibrary(StringRef Lib) {
+  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
+    if (*I == Lib)
+      return;
+  LibraryList.push_back(Lib);
+  // If the module previously had no deplibs, it may not have the metadata node.
+  // Ensure it exists now, so that we don't have to create it in
+  // convertLibraryListToMetadata (which is const)
+  getOrInsertNamedMetadata("DepLibs");
+}
+
+void Module::removeLibrary(StringRef Lib) {
+  LibraryListType::iterator I = LibraryList.begin();
+  LibraryListType::iterator E = LibraryList.end();
+  for (;I != E; ++I)
+    if (*I == Lib) {
+      LibraryList.erase(I);
+      return;
+    }
+}
+
+static std::string
+ModuleMetaGet(const Module *module, StringRef MetaName) {
+  NamedMDNode *node = module->getNamedMetadata(MetaName);
+  if (node == NULL)
+    return "";
+  assert(node->getNumOperands() == 1);
+  MDNode *subnode = node->getOperand(0);
+  assert(subnode->getNumOperands() == 1);
+  MDString *value = dyn_cast<MDString>(subnode->getOperand(0));
+  assert(value != NULL);
+  return value->getString();
+}
+
+static void
+ModuleMetaSet(Module *module, StringRef MetaName, StringRef ValueStr) {
+  NamedMDNode *node = module->getNamedMetadata(MetaName);
+  if (node)
+    module->eraseNamedMetadata(node);
+  node = module->getOrInsertNamedMetadata(MetaName);
+  MDString *value = MDString::get(module->getContext(), ValueStr);
+  node->addOperand(MDNode::get(module->getContext(),
+                   makeArrayRef(static_cast<Value*>(value))));
+}
+
+const std::string &Module::getSOName() const {
+  if (ModuleSOName == "")
+    ModuleSOName.assign(ModuleMetaGet(this, "SOName"));
+  return ModuleSOName;
+}
+
+void Module::setSOName(StringRef Name) {
+  ModuleMetaSet(this, "SOName", Name);
+  ModuleSOName = Name;
+}
+
+void Module::setOutputFormat(Module::OutputFormat F) {
+  const char *formatStr;
+  switch (F) {
+  case ObjectOutputFormat: formatStr = "object"; break;
+  case SharedOutputFormat: formatStr = "shared"; break;
+  case ExecutableOutputFormat: formatStr = "executable"; break;
+  default:
+    llvm_unreachable("Unrecognized output format in setOutputFormat()");
+  }
+  ModuleMetaSet(this, "OutputFormat", formatStr);
+}
+
+Module::OutputFormat Module::getOutputFormat() const {
+  std::string formatStr = ModuleMetaGet(this, "OutputFormat");
+  if (formatStr == "" || formatStr == "object")
+    return ObjectOutputFormat;
+  else if (formatStr == "shared")
+    return SharedOutputFormat;
+  else if (formatStr == "executable")
+    return ExecutableOutputFormat;
+  llvm_unreachable("Invalid module compile type in getOutputFormat()");
+}
+
+void
+Module::wrapSymbol(StringRef symName) {
+  std::string wrapSymName("__wrap_");
+  wrapSymName += symName;
+
+  std::string realSymName("__real_");
+  realSymName += symName;
+
+  GlobalValue *SymGV = getNamedValue(symName);
+  GlobalValue *WrapGV = getNamedValue(wrapSymName);
+  GlobalValue *RealGV = getNamedValue(realSymName);
+
+  // Replace uses of "sym" with __wrap_sym.
+  if (SymGV) {
+    if (!WrapGV)
+      WrapGV = cast<GlobalValue>(getOrInsertGlobal(wrapSymName,
+                                                   SymGV->getType()));
+    SymGV->replaceAllUsesWith(ConstantExpr::getBitCast(WrapGV,
+                                                       SymGV->getType()));
+  }
+
+  // Replace uses of "__real_sym" with "sym".
+  if (RealGV) {
+    if (!SymGV)
+      SymGV = cast<GlobalValue>(getOrInsertGlobal(symName, RealGV->getType()));
+    RealGV->replaceAllUsesWith(ConstantExpr::getBitCast(SymGV,
+                                                        RealGV->getType()));
+  }
+}
+
+// The metadata key prefix for NeededRecords.
+static const char *NeededPrefix = "NeededRecord_";
+
+void
+Module::dumpMeta(raw_ostream &OS) const {
+  OS << "OutputFormat: ";
+  switch (getOutputFormat()) {
+    case Module::ObjectOutputFormat: OS << "object"; break;
+    case Module::SharedOutputFormat: OS << "shared"; break;
+    case Module::ExecutableOutputFormat: OS << "executable"; break;
+  }
+  OS << "\n";
+  OS << "SOName: " << getSOName() << "\n";
+  for (Module::lib_iterator L = lib_begin(),
+                            E = lib_end();
+       L != E; ++L) {
+    OS << "NeedsLibrary: " << (*L) << "\n";
+  }
+  std::vector<NeededRecord> NList;
+  getNeededRecords(&NList);
+  for (unsigned i = 0; i < NList.size(); ++i) {
+    const NeededRecord &NR = NList[i];
+    OS << StringRef(NeededPrefix) << NR.DynFile << ": ";
+    for (unsigned j = 0; j < NR.Symbols.size(); ++j) {
+      if (j != 0)
+        OS << " ";
+      OS << NR.Symbols[j];
+    }
+    OS << "\n";
+  }
+}
+
+void Module::addNeededRecord(StringRef DynFile, GlobalValue *GV) {
+  if (DynFile.empty()) {
+    // We never resolved this symbol, even after linking.
+    // This should only happen in a shared object.
+    // It is safe to ignore this symbol, and let the dynamic loader
+    // figure out where it comes from.
+    return;
+  }
+  std::string Key = NeededPrefix;
+  Key += DynFile;
+  // Get the node for this file.
+  NamedMDNode *Node = getOrInsertNamedMetadata(Key);
+  // Add this global value's name to the list.
+  MDString *value = MDString::get(getContext(), GV->getName());
+  Node->addOperand(MDNode::get(getContext(),
+                   makeArrayRef(static_cast<Value*>(value))));
+}
+
+// Get the NeededRecord for SOName.
+// Returns an empty NeededRecord if there was no metadata found.
+static void getNeededRecordFor(const Module *M,
+                               StringRef SOName,
+                               Module::NeededRecord *NR) {
+  NR->DynFile = SOName;
+  NR->Symbols.clear();
+
+  std::string Key = NeededPrefix;
+  Key += SOName;
+  NamedMDNode *Node = M->getNamedMetadata(Key);
+  if (!Node)
+    return;
+
+  for (unsigned k = 0; k < Node->getNumOperands(); ++k) {
+    // Insert the symbol name.
+    const MDString *SymName =
+        dyn_cast<MDString>(Node->getOperand(k)->getOperand(0));
+    NR->Symbols.push_back(SymName->getString());
+  }
+}
+
+// Place the complete list of needed records in NeededOut.
+void Module::getNeededRecords(std::vector<NeededRecord> *NeededOut) const {
+  // Iterate through the libraries needed, grabbing each NeededRecord.
+  for (lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I) {
+    NeededRecord NR;
+    getNeededRecordFor(this, *I, &NR);
+    NeededOut->push_back(NR);
+  }
+}
+// @LOCALMOD-END
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index eeec14e834..7e42eef571 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -10,6 +10,7 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -87,3 +88,57 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
 
   return ParseIR(File.take(), Err, Context);
 }
+
+// @LOCALMOD-BEGIN
+// Note: Code below based on ParseIR and ParseIRFile in llvm/Support/IRReader.h
+Module *llvm::NaClParseIR(MemoryBuffer *Buffer,
+                          NaClFileFormat Format,
+                          SMDiagnostic &Err,
+                          LLVMContext &Context) {
+  if ((Format == PNaClFormat) &&
+      isNaClBitcode((const unsigned char *)Buffer->getBufferStart(),
+                    (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = NaClParseBitcodeFile(Buffer, Context, &ErrMsg);
+    if (M == 0)
+      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                         ErrMsg);
+    // ParseBitcodeFile does not take ownership of the Buffer.
+    delete Buffer;
+    return M;
+  } else if (Format == LLVMFormat) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+      if (M == 0)
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           ErrMsg);
+      // ParseBitcodeFile does not take ownership of the Buffer.
+      delete Buffer;
+      return M;
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  } else {
+    Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                       "Did not specify correct format for file");
+    return 0;
+  }
+}
+
+Module *llvm::NaClParseIRFile(const std::string &Filename,
+                              NaClFileFormat Format,
+                              SMDiagnostic &Err,
+                              LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return NaClParseIR(File.take(), Format, Err, Context);
+}
+
+// @LOCALMOD-END
+\ No newline at end of file
diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt
index b7bc74d616..2fea2e1dc4 100644
--- a/lib/IRReader/LLVMBuild.txt
+++ b/lib/IRReader/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/IRReader/LLVMBuild.txt -----------------------------*- Conf -*--===;
+;===- ./lib/IRReader/LLVMBuild.txt -----------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -19,4 +19,4 @@
 type = Library
 name = IRReader
 parent = Libraries
-required_libraries = AsmParser BitReader Core Support
+required_libraries = AsmParser BitReader NaClBitReader Core Support
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index d2e13c91c4..156b536353 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -945,7 +945,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
     SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
     CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap);
   }
-  
+
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
        I != E; ++I)
@@ -1169,7 +1169,20 @@ bool ModuleLinker::run() {
       DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
                                SrcM->getModuleInlineAsm());
   }
-
+  // @LOCALMOD-BEGIN
+  // Update the destination module's dependent libraries list with the libraries
+  // from the source module. There's no opportunity for duplicates here as the
+  // Module ensures that duplicate insertions are discarded.
+  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
+       SI != SE; ++SI)
+    DstM->addLibrary(*SI);
+
+  // If the source library's module id is in the dependent library list of the
+  // destination library, remove it since that module is now linked in.
+  StringRef ModuleId = SrcM->getModuleIdentifier();
+  if (!ModuleId.empty())
+    DstM->removeLibrary(sys::path::stem(ModuleId));
+  // @LOCALMOD-END
   // Loop over all of the linked values to compute type mappings.
   computeTypeMapping();
 
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index db882c020b..f2f78333b7 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMMC
   MCMachOStreamer.cpp
   MCMachObjectTargetWriter.cpp
   MCModule.cpp
+  MCNaCl.cpp
   MCNullStreamer.cpp
   MCObjectFileInfo.cpp
   MCObjectStreamer.cpp
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index fb5ab28bcf..8ae68af58f 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
@@ -239,8 +240,16 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
     BundleLockState(NotBundleLocked), BundleGroupBeforeFirstInst(false),
     HasInstructions(false)
 {
-  if (A)
+  // @LOCALMOD-BEGIN
+  if (A) {
+    // Necessary for IRT building because the IRT loader expects the end of
+    // the section to be bundle-aligned. Padding happens with 0's though,
+    // so it's not really ideal. TODO(dschuff) figure out how to do it right.
     A->getSectionList().push_back(this);
+    if (A->isBundlingEnabled() && _Section.UseCodeAlign())
+      setAlignment(A->getBundleAlignSize());
+  }
+  // @LOCALMOD-END
 }
 
 MCSectionData::iterator
diff --git a/lib/MC/MCNaCl.cpp b/lib/MC/MCNaCl.cpp
new file mode 100644
index 0000000000..4a6363d6c0
--- /dev/null
+++ b/lib/MC/MCNaCl.cpp
@@ -0,0 +1,74 @@
+//===- lib/MC/MCNaCl.cpp - NaCl-specific MC implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCNaCl.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+
+static const char NoteNamespace[] = "NaCl";
+
+namespace llvm {
+void initializeNaClMCStreamer(MCStreamer &Streamer, MCContext &Ctx,
+                              const Triple &TheTriple) {
+  assert(TheTriple.isOSNaCl());
+  const char *NoteName;
+  const char *NoteArch;
+  unsigned BundleAlign;
+  switch (TheTriple.getArch()) {
+    case Triple::arm:
+      NoteName = ".note.NaCl.ABI.arm";
+      NoteArch = "arm";
+      BundleAlign = 4;
+      break;
+    case Triple::mipsel:
+      NoteName = ".note.NaCl.ABI.mipsel";
+      NoteArch = "mipsel";
+      BundleAlign = 4;
+      break;
+    case Triple::x86:
+      NoteName = ".note.NaCl.ABI.x86-32";
+      NoteArch = "x86-32";
+      BundleAlign = 5;
+      break;
+    case Triple::x86_64:
+      NoteName = ".note.NaCl.ABI.x86-64";
+      NoteArch = "x86-64";
+      BundleAlign = 5;
+      break;
+    default:
+      report_fatal_error("Unsupported architecture for NaCl");
+  }
+
+  // Set bundle-alignment as required by the NaCl ABI for the target.
+  Streamer.EmitBundleAlignMode(BundleAlign);
+
+  // Emit an ELF Note section in its own COMDAT group which identifies NaCl
+  // object files to the gold linker, so it can use the NaCl layout.
+  const MCSection *Note = Ctx.getELFSection(
+      NoteName, ELF::SHT_NOTE, ELF::SHF_ALLOC | ELF::SHF_GROUP,
+      SectionKind::getReadOnly(), 0, NoteName);
+
+  // TODO(dschuff) This should probably use PushSection and PopSection, but
+  // PopSection will assert if there haven't been any other sections switched to
+  // yet.
+  Streamer.SwitchSection(Note);
+  Streamer.EmitIntValue(strlen(NoteNamespace) + 1, 4);
+  Streamer.EmitIntValue(strlen(NoteArch) + 1, 4);
+  Streamer.EmitIntValue(ELF::NT_VERSION, 4);
+  Streamer.EmitBytes(NoteNamespace);
+  Streamer.EmitIntValue(0, 1); // NUL terminator
+  Streamer.EmitValueToAlignment(4);
+  Streamer.EmitBytes(NoteArch);
+  Streamer.EmitIntValue(0, 1); // NUL terminator
+  Streamer.EmitValueToAlignment(4);
+}
+} // namespace llvm
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index d21ce8d1a1..5bb5ec111e 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -189,6 +189,13 @@ void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
 }
 
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+  // @LOCALMOD-BEGIN
+  if (getAssembler().isBundlingEnabled() &&
+      getAssembler().getBackend().CustomExpandInst(Inst, *this)) {
+    return;
+  }
+  // @LOCALMOD-END
+
   // Scan for values.
   for (unsigned i = Inst.getNumOperands(); i--; )
     if (Inst.getOperand(i).isExpr())
diff --git a/lib/Makefile b/lib/Makefile
index 57f016bc89..ac9050db7c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -10,9 +10,13 @@ LEVEL = ..
 
 include $(LEVEL)/Makefile.config
 
-PARALLEL_DIRS := IR AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                 Target ExecutionEngine Linker MC Object Option DebugInfo \
-								 IRReader
+PARALLEL_DIRS := IR IRReader AsmParser Bitcode Archive Analysis Transforms CodeGen \
+                Target ExecutionEngine Linker MC Object Option Wrap DebugInfo
+
+ifeq ($(NACL_SANDBOX),1)
+  PARALLEL_DIRS := $(filter-out Archive Linker, \
+                $(PARALLEL_DIRS))
+endif
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index 182c362cc7..411b9c20bf 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -267,6 +267,7 @@ void CrashRecoveryContext::Enable() {
 
   gCrashRecoveryEnabled = true;
 
+#if !defined(__native_client__)
   // Setup the signal handler.
   struct sigaction Handler;
   Handler.sa_handler = CrashRecoverySignalHandler;
@@ -276,6 +277,9 @@ void CrashRecoveryContext::Enable() {
   for (unsigned i = 0; i != NumSignals; ++i) {
     sigaction(Signals[i], &Handler, &PrevActions[i]);
   }
+#else
+#warning Cannot setup the signal handler on this machine
+#endif
 }
 
 void CrashRecoveryContext::Disable() {
@@ -286,9 +290,11 @@ void CrashRecoveryContext::Disable() {
 
   gCrashRecoveryEnabled = false;
 
+#if !defined(__native_client__)
   // Restore the previous signal handlers.
   for (unsigned i = 0; i != NumSignals; ++i)
     sigaction(Signals[i], &PrevActions[i], 0);
+#endif
 }
 
 #endif
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index f14cb45d9d..153014f790 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -187,3 +187,4 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
 }
 
 #endif // LLVM_ON_WIN32
+
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index 2917e273bc..819d25546b 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -19,7 +19,7 @@
 #include <unistd.h>
 #endif
 using namespace llvm;
-
+#ifndef __native_client__
 /// \brief Attempt to read the lock file with the given name, if it exists.
 ///
 /// \param LockFileName The name of the lock file to read.
@@ -251,3 +251,5 @@ void LockFileManager::waitForUnlock() {
 
   // Give up.
 }
+
+#endif
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 7c5ab96a76..7a8b0ecd3d 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -290,7 +290,7 @@ error_code MemoryBuffer::getFile(const char *Filename,
 static bool shouldUseMmap(int FD,
                           size_t FileSize,
                           size_t MapSize,
-                          off_t Offset,
+                          int64_t Offset,   // @LOCALMOD (?)
                           bool RequiresNullTerminator,
                           int PageSize) {
   // We don't use mmap for small files because this can severely fragment our
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 4e4a026b2f..586392fc1e 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -60,7 +60,7 @@ MutexImpl::MutexImpl( bool recursive)
   assert(errorcode == 0);
 
 #if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \
-    !defined(__DragonFly__) && !defined(__Bitrig__)
+    !defined(__DragonFly__) && !defined(__Bitrig__) && !defined(__native_client__)
   // Make it a process local mutex
   errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
   assert(errorcode == 0);
diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc
index 726e2fbcf0..aa06763258 100644
--- a/lib/Support/Unix/Host.inc
+++ b/lib/Support/Unix/Host.inc
@@ -19,7 +19,9 @@
 #include "llvm/Config/config.h"
 #include "llvm/ADT/StringRef.h"
 #include "Unix.h"
+#if !defined(__native_client__)
 #include <sys/utsname.h>
+#endif // (__native_client__)
 #include <cctype>
 #include <string>
 #include <cstdlib> // ::getenv
@@ -27,12 +29,16 @@
 using namespace llvm;
 
 static std::string getOSVersion() {
+#if !defined(__native_client__)
   struct utsname info;
 
   if (uname(&info))
     return "";
 
   return info.release;
+#else // (__native_client__)
+  return "";
+#endif // (__native_client__)
 }
 
 std::string sys::getDefaultTargetTriple() {
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 72a8af621d..6be3ee045f 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -156,8 +156,12 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
     return error_code(EINVAL, generic_category());
 
   int Protect = getPosixProtectionFlags(Flags);
-
+#ifndef __native_client__
   int Result = ::mprotect(M.Address, M.Size, Protect);
+#else
+  int Result = -1;
+  llvm_unreachable("Native client does not support mprotect");
+#endif
   if (Result != 0)
     return error_code(errno, system_category());
 
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 6a5ebb8cd9..b82371a7b6 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -133,7 +133,9 @@ Path::GetRootDirectory() {
 
 Path
 Path::GetTemporaryDirectory(std::string *ErrMsg) {
-#if defined(HAVE_MKDTEMP)
+#if defined(__native_client__)
+  return Path("");
+#elif defined(HAVE_MKDTEMP)
   // The best way is with mkdtemp but that's not available on many systems,
   // Linux and FreeBSD have it. Others probably won't.
   char pathname[] = "/tmp/llvm_XXXXXX";
@@ -251,6 +253,7 @@ Path::GetUserHomeDirectory() {
 
 Path
 Path::GetCurrentDirectory() {
+#if !defined(__native_client__)
   char pathname[MAXPATHLEN];
   if (!getcwd(pathname, MAXPATHLEN)) {
     assert(false && "Could not query current working directory.");
@@ -258,6 +261,9 @@ Path::GetCurrentDirectory() {
   }
 
   return Path(pathname);
+#else // (__native_client__)
+  return Path("./");
+#endif // (__native_client__)
 }
 
 #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
@@ -319,7 +325,9 @@ getprogpath(char ret[PATH_MAX], const char *bin)
 /// GetMainExecutable - Return the path to the main executable, given the
 /// value of argv[0] from program startup.
 Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
-#if defined(__APPLE__)
+#if defined(__native_client__)
+   return Path(std::string("./") + std::string(argv0));
+#elif defined(__APPLE__)
   // On OS X the executable path is saved to the stack by dyld. Reading it
   // from there is much faster than calling dladdr, especially for large
   // binaries with symbols.
@@ -420,7 +428,11 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
 
 bool
 Path::exists() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), F_OK );
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
@@ -433,21 +445,33 @@ Path::isDirectory() const {
 
 bool
 Path::isSymLink() const {
+#if defined(__native_client__)
+  return false;
+#else
   struct stat buf;
   if (0 != lstat(path.c_str(), &buf))
     return false;
   return S_ISLNK(buf.st_mode);
+#endif
 }
 
 
 bool
 Path::canRead() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), R_OK);
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
 Path::canWrite() const {
+#if !defined(__native_client__)
   return 0 == access(path.c_str(), W_OK);
+#else // (__native_client__)
+  return true;
+#endif // (__native_client__)
 }
 
 bool
@@ -466,6 +490,7 @@ Path::isRegularFile() const {
 
 bool
 Path::canExecute() const {
+#if !defined(__native_client__)
   if (0 != access(path.c_str(), R_OK | X_OK ))
     return false;
   struct stat buf;
@@ -473,6 +498,7 @@ Path::canExecute() const {
     return false;
   if (!S_ISREG(buf.st_mode))
     return false;
+#endif // (__native_client__)
   return true;
 }
 
@@ -520,6 +546,7 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
 }
 
 static bool AddPermissionBits(const Path &File, int bits) {
+#if !defined(__native_client__)
   // Get the umask value from the operating system.  We want to use it
   // when changing the file's permissions. Since calling umask() sets
   // the umask and returns its old value, we must call it a second
@@ -535,6 +562,7 @@ static bool AddPermissionBits(const Path &File, int bits) {
   // that the umask would not disable.
   if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
       return false;
+#endif // (__native_client__)
   return true;
 }
 
@@ -558,6 +586,7 @@ bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
 
 bool
 Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+#if !defined(__native_client__)
   DIR* direntries = ::opendir(path.c_str());
   if (direntries == 0)
     return MakeErrMsg(ErrMsg, path + ": can't open directory");
@@ -583,6 +612,7 @@ Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
   }
 
   closedir(direntries);
+#endif
   return false;
 }
 
@@ -635,7 +665,7 @@ Path::eraseSuffix() {
 }
 
 static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
-
+#if !defined(__native_client__)
   if (access(beg, R_OK | W_OK) == 0)
     return false;
 
@@ -660,6 +690,9 @@ static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
   }
 
   return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+#else // (__native_client__)
+  return false;
+#endif // (__native_client__)
 }
 
 bool
@@ -683,11 +716,13 @@ Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
 
 bool
 Path::createFileOnDisk(std::string* ErrMsg) {
+#if !defined(__native_client__)
   // Create the file
   int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
   if (fd < 0)
     return MakeErrMsg(ErrMsg, path + ": can't create file");
   ::close(fd);
+#endif // (__native_client__)
   return false;
 }
 
@@ -707,6 +742,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
 
 bool
 Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+#if !defined(__native_client__)
   // Get the status so we can determine if it's a file or directory.
   struct stat buf;
   if (0 != stat(path.c_str(), &buf)) {
@@ -751,18 +787,26 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
   if (rmdir(pathname.c_str()) != 0)
     return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
   return false;
+#else // (__native_client__)
+  MakeErrMsg(ErrStr, ": PNACL does not know how to erase directories!");
+  return false;
+#endif // (__native_client__)
+
 }
 
 bool
 Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+#if !defined(__native_client__)
   if (0 != ::rename(path.c_str(), newName.c_str()))
     return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
                newName.str() + "'");
+#endif
   return false;
 }
 
 bool
 Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+#if !defined(__native_client__)
   struct utimbuf utb;
   utb.actime = si.modTime.toPosixTime();
   utb.modtime = utb.actime;
@@ -770,6 +814,7 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
     return MakeErrMsg(ErrStr, path + ": can't set file modification time");
   if (0 != ::chmod(path.c_str(),si.mode))
     return MakeErrMsg(ErrStr, path + ": can't set mode");
+#endif // (__native_client__)
   return false;
 }
 
diff --git a/lib/Support/Unix/PathV2.inc b/lib/Support/Unix/PathV2.inc
index 7e0aead151..f426fbf7fc 100644
--- a/lib/Support/Unix/PathV2.inc
+++ b/lib/Support/Unix/PathV2.inc
@@ -117,7 +117,9 @@ error_code current_path(SmallVectorImpl<char> &result) {
 // For GNU Hurd
   result.reserve(1024);
 #endif
-
+#ifdef __native_client__
+  llvm_unreachable("current_path() not implemented for Native Client");
+#else
   while (true) {
     if (::getcwd(result.data(), result.capacity()) == 0) {
       // See if there was a real error.
@@ -130,6 +132,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
   }
 
   result.set_size(strlen(result.data()));
+#endif
   return error_code::success();
 }
 
@@ -193,6 +196,9 @@ error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
 }
 
 error_code create_directory(const Twine &path, bool &existed) {
+#ifdef __native_client__
+  llvm_unreachable("create_directory() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -204,9 +210,13 @@ error_code create_directory(const Twine &path, bool &existed) {
     existed = false;
 
   return error_code::success();
+#endif
 }
 
 error_code create_hard_link(const Twine &to, const Twine &from) {
+#ifdef __native_client__
+  llvm_unreachable("create_hard_link() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -217,9 +227,13 @@ error_code create_hard_link(const Twine &to, const Twine &from) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code create_symlink(const Twine &to, const Twine &from) {
+#ifdef __native_client__
+  llvm_unreachable("create_symlink() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -230,9 +244,13 @@ error_code create_symlink(const Twine &to, const Twine &from) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code remove(const Twine &path, bool &existed) {
+#ifdef __native_client__
+  llvm_unreachable("remove() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -244,9 +262,13 @@ error_code remove(const Twine &path, bool &existed) {
     existed = true;
 
   return error_code::success();
+#endif
 }
 
 error_code rename(const Twine &from, const Twine &to) {
+#ifdef __native_client__
+  llvm_unreachable("rename() not implemented for Native Client");
+#else
   // Get arguments.
   SmallString<128> from_storage;
   SmallString<128> to_storage;
@@ -266,9 +288,13 @@ error_code rename(const Twine &from, const Twine &to) {
   }
 
   return error_code::success();
+#endif
 }
 
 error_code resize_file(const Twine &path, uint64_t size) {
+#ifdef __native_client__
+  llvm_unreachable("resize_file() not implemented for Native Client");
+#else
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
@@ -276,6 +302,7 @@ error_code resize_file(const Twine &path, uint64_t size) {
     return error_code(errno, system_category());
 
   return error_code::success();
+#endif
 }
 
 error_code exists(const Twine &path, bool &result) {
@@ -474,6 +501,10 @@ rety_open_create:
 }
 
 error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
+#ifdef __native_client__
+  // Newlib does not have ftruncate.
+  llvm_unreachable("mapped_file_region not implemented for native client");
+#else
   AutoFD ScopedFD(FD);
   if (!CloseFD)
     ScopedFD.take();
@@ -501,6 +532,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
   if (Mapping == MAP_FAILED)
     return error_code(errno, system_category());
   return error_code::success();
+#endif // __native_client__
 }
 
 mapped_file_region::mapped_file_region(const Twine &path,
@@ -511,6 +543,9 @@ mapped_file_region::mapped_file_region(const Twine &path,
   : Mode(mode)
   , Size(length)
   , Mapping() {
+#ifdef __native_client__
+  llvm_unreachable("mapped_file_region not implemented for native client");
+#endif
   // Make sure that the requested size fits within SIZE_T.
   if (length > std::numeric_limits<size_t>::max()) {
     ec = make_error_code(errc::invalid_argument);
@@ -540,6 +575,9 @@ mapped_file_region::mapped_file_region(int fd,
   : Mode(mode)
   , Size(length)
   , Mapping() {
+#ifdef __native_client__
+  llvm_unreachable("mapped_file_region not implemented for native client");
+#endif
   // Make sure that the requested size fits within SIZE_T.
   if (length > std::numeric_limits<size_t>::max()) {
     ec = make_error_code(errc::invalid_argument);
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 9a4454f1c6..02c8690ffc 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -36,6 +36,8 @@
 #  include <termios.h>
 #endif
 
+#include <sys/unistd.h>
+
 //===----------------------------------------------------------------------===//
 //=== WARNING: Implementation here must contain only generic UNIX code that
 //===          is guaranteed to work on *all* UNIX variants.
@@ -94,9 +96,10 @@ static unsigned getPageSize() {
   const int page_size = 0x1000;
 #elif defined(HAVE_GETPAGESIZE)
   const int page_size = ::getpagesize();
-#elif defined(HAVE_SYSCONF)
+#elif defined(HAVE_SYSCONF)  && !defined(__native_client__)
   long page_size = ::sysconf(_SC_PAGE_SIZE);
 #else
+  const int page_size = 0;
 #warning Cannot get the page size on this machine
 #endif
   return static_cast<unsigned>(page_size);
@@ -139,11 +142,23 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
 }
 
 int Process::GetCurrentUserId() {
+#if !defined(__native_client__)
   return getuid();
+#else // (__native_client__)
+// TODO(abetul): What the proper return value should be for this function?
+// What about having a reserved user_id or the user "nobody" for PNACL?
+  return -1;
+#endif // (__native_client__)
 }
 
 int Process::GetCurrentGroupId() {
+#if !defined(__native_client__)
   return getgid();
+#else // (__native_client__)
+// TODO(abetul): What the proper return value should be for this function?
+// What about having a reserved/unused group_id?  
+  return -1;
+#endif // (__native_client__)
 }
 
 #if defined(HAVE_MACH_MACH_H) && !defined(__GNU__)
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index aa03d48438..c6cc698e9b 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -102,6 +102,10 @@ Program::FindProgramByName(const std::string& progName) {
 }
 
 static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+#if defined(__native_client__)
+  MakeErrMsg(ErrMsg, "Cannot redirect I/O in NaCl");
+  return true;
+#else // (__native_client__)
   if (Path == 0) // Noop
     return false;
   const char *File;
@@ -118,7 +122,6 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
               + (FD == 0 ? "input" : "output"));
     return true;
   }
-
   // Install it as the requested FD
   if (dup2(InFD, FD) == -1) {
     MakeErrMsg(ErrMsg, "Cannot dup2");
@@ -127,6 +130,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
   }
   close(InFD);      // Close the original FD
   return false;
+#endif // (__native_client__)
 }
 
 #ifdef HAVE_POSIX_SPAWN
@@ -236,6 +240,7 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   }
 #endif
 
+#if !defined(__native_client__)
   // Create a child process.
   int child = fork();
   switch (child) {
@@ -296,6 +301,10 @@ Program::Execute(const Path &path, const char **args, const char **envp,
   Data_ = reinterpret_cast<void*>(child);
 
   return true;
+#else // (__native_client__)
+  MakeErrMsg(ErrMsg, "PNACL does not know how to execute child processes!");
+  return false;
+#endif // (__native_client__)
 }
 
 int
@@ -303,6 +312,7 @@ Program::Wait(const sys::Path &path,
               unsigned secondsToWait,
               std::string* ErrMsg)
 {
+#if !defined(__native_client__)
 #ifdef HAVE_SYS_WAIT_H
   struct sigaction Act, Old;
 
@@ -395,6 +405,18 @@ Program::Wait(const sys::Path &path,
     *ErrMsg = "Program::Wait is not implemented on this platform yet!";
   return -1;
 #endif
+#else // (__native_client__)
+// TODO(abetul): What should the proper return value be here?
+  MakeErrMsg(ErrMsg, "PNACL does not know how to wait for a child process!");
+  return -1;
+#endif // (__native_client__)
+#if !defined(__native_client__)
+
+#else // (__native_client__)
+  MakeErrMsg(ErrMsg, "PNACL does not know how to kill processes!");
+  return true;
+#endif // (__native_client__)
+
 }
 
 error_code Program::ChangeStdinToBinary(){
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index 64d1fc1c08..cdb1be900b 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -86,6 +86,7 @@ static struct {
 
 
 static void RegisterHandler(int Signal) {
+#if !defined(__native_client__)
   assert(NumRegisteredSignals <
          sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
          "Out of space for signal handlers!");
@@ -101,6 +102,7 @@ static void RegisterHandler(int Signal) {
             &RegisteredSignalInfo[NumRegisteredSignals].SA);
   RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
   ++NumRegisteredSignals;
+#endif // (__native_client__)
 }
 
 static void RegisterHandlers() {
@@ -112,11 +114,13 @@ static void RegisterHandlers() {
 }
 
 static void UnregisterHandlers() {
+#if !defined(__native_client__)
   // Restore all of the signal handlers to how they were before we showed up.
   for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
     sigaction(RegisteredSignalInfo[i].SigNo,
               &RegisteredSignalInfo[i].SA, 0);
   NumRegisteredSignals = 0;
+#endif // (__native_client__)
 }
 
 
@@ -159,10 +163,12 @@ static RETSIGTYPE SignalHandler(int Sig) {
   // instead of recursing in the signal handler.
   UnregisterHandlers();
 
+#if !defined(__native_client__)
   // Unmask all potentially blocked kill signals.
   sigset_t SigMask;
   sigfillset(&SigMask);
   sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+#endif
 
   SignalsMutex.acquire();
   RemoveFilesToRemove();
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index df8558bf8b..0d7dd90bd1 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -18,6 +18,13 @@
 
 #include "Unix.h"
 
+// @LOCALMOD-START
+#ifndef timerclear
+// Newlib does not have the timer{clear,add,sub} macros
+#define timerclear(tvp)  ((tvp)->tv_sec = (tvp)->tv_usec = 0)
+#endif
+// @LOCALMOD-END
+
 namespace llvm {
   using namespace sys;
 
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 80e5f37eb0..6d49d33e6d 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -20,6 +20,9 @@
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
+// @LOCALMOD (for LowerARMMachineInstrToMCInstPCRel)
+#include "llvm/MC/MCSymbol.h"
+
 namespace llvm {
 
 class ARMAsmPrinter;
@@ -45,12 +48,31 @@ FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
+/* @LOCALMOD-START */
+FunctionPass *createARMNaClRewritePass();
+/* @LOCALMOD-END */
+
 /// \brief Creates an ARM-specific Target Transformation Info pass.
 ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
 
+
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+                                          
+/* @LOCALMOD-START */
+// Used to lower the pc-relative MOVi16PIC / MOVTi16PIC pseudo instructions
+// into the real MOVi16 / MOVTi16 instructions.
+// See comment on MOVi16PIC for more details.
+void LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI,
+                                       MCInst &OutMI,
+                                       ARMAsmPrinter &AP,
+                                       unsigned ImmIndex,
+                                       unsigned PCIndex,
+                                       MCSymbol *PCLabel,
+                                       unsigned PCAdjustment);
+/* @LOCALMOD-END */
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 13ec208793..35103a321e 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -40,6 +40,7 @@
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCNaCl.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
@@ -55,6 +56,13 @@
 #include <cctype>
 using namespace llvm;
 
+// @LOCALMOD-START
+namespace llvm {
+  extern cl::opt<bool> FlagSfiBranch;
+  extern cl::opt<bool> FlagSfiData;
+}
+// @LOCALMOD-END
+
 namespace {
 
   // Per section and per symbol attributes are not supported.
@@ -226,6 +234,75 @@ getDebugValueLocation(const MachineInstr *MI) const {
   return Location;
 }
 
+// @LOCALMOD-START
+// Make sure all jump targets are aligned and also all constant pools
+void NaclAlignAllJumpTargetsAndConstantPools(MachineFunction &MF) {
+  // JUMP TABLE TARGETS
+  MachineJumpTableInfo *jt_info = MF.getJumpTableInfo();
+  if (jt_info) {
+    const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables();
+    for (unsigned i=0; i < JT.size(); ++i) {
+      std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs;
+
+      for (unsigned j=0; j < MBBs.size(); ++j) {
+        if (MBBs[j]->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+          continue;
+        }
+        MBBs[j]->setAlignment(4);
+      }
+    }
+  }
+
+  // FIRST ENTRY IN A ConstantPool
+  bool last_bb_was_constant_pool = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    if (I->isLandingPad()) {
+        I->setAlignment(4);
+    }
+
+    if (I->empty()) continue;
+
+    bool is_constant_pool = I->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY;
+
+    if (last_bb_was_constant_pool != is_constant_pool) {
+      I->setAlignment(4);
+    }
+
+    last_bb_was_constant_pool = is_constant_pool;
+  }
+}
+
+bool ARMAsmPrinter::UseReadOnlyJumpTables() const {
+  if (Subtarget->isTargetNaCl())
+    return true;
+  return false;
+}
+
+unsigned ARMAsmPrinter::GetTargetBasicBlockAlign() const {
+  if (Subtarget->isTargetNaCl())
+    return 4;
+  return 0;
+}
+
+unsigned ARMAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 4;
+    }
+  }
+  return 0;
+}
+// @LOCALMOD-END
+
 /// EmitDwarfRegOp - Emit dwarf register operation.
 void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const {
   const TargetRegisterInfo *RI = TM.getRegisterInfo();
@@ -302,6 +379,18 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
     OutStreamer.EmitThumbFunc(CurrentFnSym);
   }
 
+  // @LOCALMOD-START
+  // make sure function entry is aligned. We use  XmagicX as our basis
+  // for alignment decisions (c.f. assembler sfi macros)
+  if (Subtarget->isTargetNaCl()) {
+    EmitAlignment(std::max(MF->getAlignment(), 4u));
+
+    if (OutStreamer.hasRawTextSupport()) {
+      OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n"));
+    }
+  }
+  // @LOCALMOD-END
+ 
   OutStreamer.EmitLabel(CurrentFnSym);
 }
 
@@ -328,6 +417,11 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   AFI = MF.getInfo<ARMFunctionInfo>();
   MCP = MF.getConstantPool();
 
+  // @LOCALMOD-START
+  if (FlagSfiBranch) {
+    NaclAlignAllJumpTargetsAndConstantPools(MF);
+  }
+  // @LOCALMOD-END
   return AsmPrinter::runOnMachineFunction(MF);
 }
 
@@ -368,10 +462,10 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
   case MachineOperand::MO_GlobalAddress: {
     const GlobalValue *GV = MO.getGlobal();
     if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
-        (TF & ARMII::MO_LO16))
+        (TF == ARMII::MO_LO16)) // @LOCALMOD: TEMPORARY FIX
       O << ":lower16:";
     else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
-             (TF & ARMII::MO_HI16))
+             (TF == ARMII::MO_HI16)) // @LOCALMOD: TEMPORARY FIX
       O << ":upper16:";
     O << *Mang->getSymbol(GV);
 
@@ -397,6 +491,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 
 //===--------------------------------------------------------------------===//
 
+
 MCSymbol *ARMAsmPrinter::
 GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
   SmallString<60> Name;
@@ -576,6 +671,8 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+void EmitSFIHeaders(raw_ostream &O);
+
 void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
     Reloc::Model RelocM = TM.getRelocationModel();
@@ -635,8 +732,20 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // Emit ARM Build Attributes
   if (Subtarget->isTargetELF())
     emitAttributes();
-}
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    if (OutStreamer.hasRawTextSupport()) {
+      std::string str;
+      raw_string_ostream OS(str);
+      EmitSFIHeaders(OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    initializeNaClMCStreamer(OutStreamer, OutContext,
+                             Subtarget->getTargetTriple());
+  }
+  // @LOCALMOD-END
+}
 
 void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
   if (Subtarget->isTargetDarwin()) {
@@ -711,6 +820,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
 }
 
+
 //===----------------------------------------------------------------------===//
 // Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
 // FIXME:
@@ -976,7 +1086,20 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
       PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
     }
     Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+  } else {   // @LOCALMOD-BEGIN
+    // Check mustAddCurrentAddress() when getPCAdjustment() == 0,
+    // and make it actually *Subtract* the current address.
+    // A more appropriate name is probably "relativeToCurrentAddress",
+    // since the assembler can't actually handle "X + .", only "X - .".
+    if (ACPV->mustAddCurrentAddress()) {
+      MCSymbol *DotSym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(DotSym);
+      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+      Expr = MCBinaryExpr::CreateSub(Expr, DotExpr, OutContext);
+    }
   }
+  // @LOCALMOD-END
+
   OutStreamer.EmitValue(Expr, Size);
 }
 
@@ -1561,6 +1684,28 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       InConstantPool = true;
     }
 
+    
+    // @LOCALMOD-START
+    // NOTE: we also should make sure that the first data item
+    // is not in a code bundle
+    // NOTE: there may be issues with alignment constraints
+    if (Subtarget->isTargetNaCl() && OutStreamer.hasRawTextSupport()) {
+      const unsigned size = MI->getOperand(2).getImm();
+      //assert(size == 4 || size == 8 && "Unsupported data item size");
+      if (size == 8) {
+        // we cannot generate a size 8 constant at offset 12 (mod 16)
+        OutStreamer.EmitRawText(StringRef("sfi_nop_if_at_bundle_end\n"));
+      }
+
+      if (FlagSfiData) {
+        SmallString<128> Str;
+        raw_svector_ostream OS(Str);
+        OS << "sfi_illegal_if_at_bundle_begining  @ ========== SFI (" << 
+          size << ")\n";
+        OutStreamer.EmitRawText(OS.str());
+      }
+    }
+    // @LOCALMOD-END
     OutStreamer.EmitLabel(GetCPISymbol(LabelId));
 
     const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
@@ -1926,6 +2071,50 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       .addReg(0));
     return;
   }
+
+  // @LOCALMOD-BEGIN
+  // These are pseudo ops for MOVW / MOVT with operands relative to a PC label.
+  // See the comments on MOVi16PIC in the .td file for more details.
+  case ARM::MOVi16PIC: {
+    MCInst TmpInst;
+    // First, build an instruction w/ the real opcode.
+    TmpInst.setOpcode(ARM::MOVi16);
+
+    unsigned ImmIndex = 1;
+    unsigned PIC_id_index = 2;
+    unsigned PCAdjustment = 8;
+    // NOTE: if getPICLabel was a method of "this", or otherwise in scope for
+    // LowerARMMachineInstrToMCInstPCRel, then we wouldn't need to create
+    // it here (as well as below).
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    MI->getOperand(PIC_id_index).getImm(),
+                                    OutContext);
+    LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex,
+                                      PIC_id_index, PCLabel, PCAdjustment);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVTi16PIC: {
+    MCInst TmpInst;
+    // First, build an instruction w/ the real opcode.
+    TmpInst.setOpcode(ARM::MOVTi16);
+
+    unsigned ImmIndex = 2;
+    unsigned PIC_id_index = 3;
+    unsigned PCAdjustment = 8;
+
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    MI->getOperand(PIC_id_index).getImm(),
+                                    OutContext);
+
+    LowerARMMachineInstrToMCInstPCRel(MI, TmpInst, *this, ImmIndex,
+                                      PIC_id_index, PCLabel, PCAdjustment);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  //@LOCALMOD-END
   }
 
   MCInst TmpInst;
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index c945e4f286..30418a0ab3 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -68,9 +68,16 @@ public:
   virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
   virtual bool runOnMachineFunction(MachineFunction &F) LLVM_OVERRIDE;
 
+  // @LOCALMOD-START
+  // usually this does nothing on ARM as constants pools
+  // are handled with custom code.
+  // For the sfi case we do not use the custom logic and fall back
+  // to the default implementation.
   virtual void EmitConstantPool() LLVM_OVERRIDE {
-    // we emit constant pools customly!
+    if (FlagSfiDisableCP) AsmPrinter::EmitConstantPool();
   }
+  // @LOCALMOD-END
+
   virtual void EmitFunctionBodyEnd() LLVM_OVERRIDE;
   virtual void EmitFunctionEntryLabel() LLVM_OVERRIDE;
   virtual void EmitStartOfAsmFile(Module &M) LLVM_OVERRIDE;
@@ -79,6 +86,17 @@ public:
 
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+  
+  // @LOCALMOD-START
+  /// UseReadOnlyJumpTables - true if JumpTableInfo must be in rodata.
+  virtual bool UseReadOnlyJumpTables() const;
+  /// GetTargetBasicBlockAlign - Get the target alignment for basic blocks.
+  virtual unsigned GetTargetBasicBlockAlign() const;
+  /// GetTargetLabelAlign - Get optional alignment for TargetOpcode
+  /// labels E.g., EH_LABEL.
+  /// TODO(sehr,robertm): remove this if the labeled block has address taken.
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;
+  // @LOCALMOD-END
 
 private:
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6005054271..d5f73b1a54 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -746,6 +746,9 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     if (Opc == ARM::VORRq)
       Mov.addReg(Src);
     Mov = AddDefaultPred(Mov);
+    // MOVr can set CC.
+    if (Opc == ARM::MOVr)
+      Mov = AddDefaultCC(Mov);
   }
   // Add implicit super-register defs and kills to the last instruction.
   Mov->addRegisterDefined(DestReg, TRI);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index b0d34a76b0..6c4bab4726 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -17,6 +17,7 @@
 #include "ARMFrameLowering.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"  // @LOCALMOD
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
@@ -50,8 +51,10 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
     BasePtr(ARM::R6) {
 }
 
+extern cl::opt<bool> ReserveR9; // @LOCALMOD
 const uint16_t*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  if (ReserveR9) return CSR_NaCl_SaveList; // @LOCALMOD
   bool ghcCall = false;
  
   if (MF) {
@@ -70,6 +73,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
 
 const uint32_t*
 ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const {
+  if (ReserveR9) return CSR_NaCl_RegMask; // @LOCALMOD
   return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
     ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
 }
@@ -374,6 +378,13 @@ emitLoadConstPool(MachineBasicBlock &MBB,
                   unsigned DestReg, unsigned SubIdx, int Val,
                   ARMCC::CondCodes Pred,
                   unsigned PredReg, unsigned MIFlags) const {
+  // @LOCALMOD-START
+  // In the sfi case we do not want to use the load const pseudo instr.
+  // Sadly, the ARM backend is not very consistent about using this
+  // pseudo instr. and hence checking this is not sufficient.
+  // But, it should help detect some regressions early.
+  assert(!FlagSfiDisableCP && "unexpected call to emitLoadConstPool");
+  // @LOCALMOD-END
   MachineFunction &MF = *MBB.getParent();
   MachineConstantPool *ConstantPool = MF.getConstantPool();
   const Constant *C =
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8ff666ed28..b051d88534 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -105,6 +105,10 @@ def CC_ARM_APCS_GHC : CallingConv<[
 
 def CC_ARM_AAPCS_Common : CallingConv<[
 
+  // @LOCALMOD-BEGIN (PR11018)
+  CCIfByVal<CCPassByVal<4, 4>>,
+  // @LOCALMOD-END
+
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
 
   // i64/f64 is passed in even pairs of GPRs
@@ -214,3 +218,9 @@ def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
 // add is a workaround for not being able to compile empty list:
 // def CSR_GHC : CalleeSavedRegs<()>;
 def CSR_GHC : CalleeSavedRegs<(add)>;
+
+// @LOCALMOD-START
+// NaCl does not save R9, but otherwise uses the same order as AAPCS
+def CSR_NaCl : CalleeSavedRegs<(add LR, R11, R10, R8, R7, R6, R5, R4,
+                                     (sequence "D%u", 15, 8))>;
+// @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 4891609b33..d4aaf97a14 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -374,6 +374,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
 }
 
 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
+  if (FlagSfiDisableCP) return false;   // @LOCALMOD
   MF = &mf;
   MCP = mf.getConstantPool();
 
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 93812fe6bb..b6e0fe7c7c 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -81,6 +81,9 @@ public:
   bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
   bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
   bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
+  // @LOCALMOD-START
+  bool isValue() const { return Kind == ARMCP::CPValue; }
+  // @LOCALMOD-END
 
   virtual unsigned getRelocationInfo() const { return 2; }
 
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index beb843ca9a..0fa3fe9bc8 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetOptions.h" // @LOCALMOD for llvm::TLSUseCall
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
 #include "llvm/Target/TargetFrameLowering.h"
@@ -43,6 +44,7 @@ namespace {
     const TargetRegisterInfo *TRI;
     const ARMSubtarget *STI;
     ARMFunctionInfo *AFI;
+    bool IsRelocPIC; // @LOCALMOD
 
     virtual bool runOnMachineFunction(MachineFunction &Fn);
 
@@ -63,6 +65,16 @@ namespace {
                     unsigned Opc, bool IsExt);
     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator &MBBI);
+    // @LOCALMOD-BEGIN
+    void AddPICADD_MOVi16_PICID(MachineInstr &MI,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator &MBBI,
+                                bool NotThumb,
+                                unsigned PredReg, ARMCC::CondCodes Pred,
+                                unsigned DstReg, bool DstIsDead,
+                                MachineInstrBuilder &LO16,
+                                MachineInstrBuilder &HI16);
+    // @LOCALMOD-END
   };
   char ARMExpandPseudo::ID = 0;
 }
@@ -478,13 +490,46 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
   if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
     MIB->addRegisterKilled(SrcReg, TRI, true);
   TransferImpOps(MI, MIB, MIB);
-
   // Transfer memoperands.
   MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-
   MI.eraseFromParent();
 }
 
+// @LOCALMOD-BEGIN
+// AddPICADD_MOVi16_PICID - Inserts a PICADD into the given basic block,
+// and adds the PC label ID (of the PICADD) as an operand of the LO16 / HI16
+// MOVs. The ID operand will follow the "Immediate" operand (assumes that
+// operand is already added).
+void ARMExpandPseudo::AddPICADD_MOVi16_PICID(MachineInstr &MI,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator &MBBI,
+                                       bool NotThumb,
+                                       unsigned PredReg, ARMCC::CondCodes Pred,
+                                       unsigned DstReg, bool DstIsDead,
+                                       MachineInstrBuilder &LO16,
+                                       MachineInstrBuilder &HI16) {
+  // Throw in a PICADD, and tack on the PC label ID to the MOVT/MOVWs
+  MachineFunction &MF = *MI.getParent()->getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Make a unique ID for this PC by pulling from pool of constPoolIDs
+  unsigned PC_ID = AFI->createPICLabelUId();
+  MachineInstrBuilder PicADD =
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(NotThumb ? ARM::PICADD : ARM::tPICADD))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstReg)
+      .addImm(PC_ID)
+      .addImm(Pred)
+      .addReg(PredReg);
+  (void)PicADD; // squelch unused warning.
+
+  // Add the PC label ID after what would have been an absolute address.
+  LO16 = LO16.addImm(PC_ID);
+  HI16 = HI16.addImm(PC_ID);
+}
+// @LOCALMOD-END
+
 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
 /// register operands to real instructions with D register operands.
 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
@@ -645,7 +690,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
 
   unsigned LO16Opc = 0;
   unsigned HI16Opc = 0;
-  if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+  // @LOCALMOD
+  bool isThumb2 = (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm);
+  if (isThumb2) {
     LO16Opc = ARM::t2MOVi16;
     HI16Opc = ARM::t2MOVTi16;
   } else {
@@ -653,10 +700,28 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     HI16Opc = ARM::MOVTi16;
   }
 
+  // @LOCALMOD-BEGIN
+  // If constant pools are "disabled" (actually, moved to rodata), then
+  // many addresses (e.g., the addresses of what used to be the "pools")
+  // may not be materialized in a pc-relative manner, because MOVT / MOVW
+  // are used to materialize the addresses.
+  // We need to know if it matters that references are pc-relative
+  // (e.g., to be PIC).
+  // See the comments on MOVi16PIC / MOVTi16PIC for more details.
+  const bool ShouldUseMOV16PIC = FlagSfiDisableCP && IsRelocPIC &&
+      (MO.isCPI() || MO.isJTI() || MO.isGlobal()); // TODO check this list.
+  if (ShouldUseMOV16PIC) {
+    if (isThumb2)
+      llvm_unreachable("FIXME: add PIC versions of t2MOVi16");
+    LO16Opc = ARM::MOVi16PIC;
+    HI16Opc = ARM::MOVTi16PIC;
+  }
+  // @LOCALMOD-END
+
   LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
   HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
-    .addReg(DstReg);
+    .addReg(DstReg, RegState::Kill); // @LOCALMOD
 
   if (MO.isImm()) {
     unsigned Imm = MO.getImm();
@@ -664,13 +729,31 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
     unsigned Hi16 = (Imm >> 16) & 0xffff;
     LO16 = LO16.addImm(Lo16);
     HI16 = HI16.addImm(Hi16);
-  } else {
+  } else if (MO.isGlobal()) { // @LOCALMOD
     const GlobalValue *GV = MO.getGlobal();
     unsigned TF = MO.getTargetFlags();
     LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
     HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+  // @LOCALMOD-START - support for jumptable addresses and CPI
+  } else if (MO.isCPI()) {
+    int i = MO.getIndex();
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_LO16);
+    HI16 = HI16.addConstantPoolIndex(i, MO.getOffset(), TF|ARMII::MO_HI16);
+  } else if (MO.isJTI()){
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_LO16);
+    HI16 = HI16.addJumpTableIndex(MO.getIndex(), TF | ARMII::MO_HI16);
+  } else {
+    assert (0 && "unexpected operand");
+  // @LOCALMOD-END
   }
-
+  // @LOCALMOD-BEGIN
+  if (ShouldUseMOV16PIC) {
+    AddPICADD_MOVi16_PICID(MI, MBB, MBBI, !isThumb2,
+                           PredReg, Pred, DstReg, DstIsDead, LO16, HI16);
+  }
+  // @LOCALMOD-END
   LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   LO16.addImm(Pred).addReg(PredReg);
@@ -848,13 +931,37 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     }
     case ARM::tTPsoft:
     case ARM::TPsoft: {
+      // @LOCALMOD-BEGIN
+      if (!STI->isTargetNaCl() || llvm::TLSUseCall) {
+        // Don't add implicit uses/defs for this call, otherwise
+        // liveness analysis passes get confused.
       MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          BuildMI_NoImp(MBB, MBBI, MI.getDebugLoc(), // @LOCALMOD
                 TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
-        .addExternalSymbol("__aeabi_read_tp", 0);
+          .addExternalSymbol("__aeabi_read_tp", 0);
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      TransferImpOps(MI, MIB, MIB);
+        TransferImpOps(MI, MIB, MIB);
+      } else {
+        // Inline version for native client.
+        // See native_client/src/untrusted/nacl/aeabi_read_tp.S
+        // .nexe builds use this version, while irt builds use a call to
+        // __aeabi_read_tp.
+        if (FlagNaClUseM23ArmAbi) {
+          // mov r0, r9
+          AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                 TII->get(ARM::MOVr), ARM::R0)
+                         .addReg(ARM::R9))
+              .addReg(0); // Doesn't use/modify CPSR.
+        } else {
+          // ldr r0, [r9, #0]
+          AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                 TII->get(ARM::LDRi12), ARM::R0)
+                         .addReg(ARM::R9)
+                         .addImm(0));
+        }
+      }
+      // @LOCALMOD-END
       MI.eraseFromParent();
       return true;
     }
@@ -1210,6 +1317,62 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
     case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
     case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
+
+    // @LOCALMOD-BEGIN
+    case ARM::ARMeh_return: {
+      // This pseudo instruction is generated as part of the lowering of
+      // ISD::EH_RETURN (c.f. ARMISelLowering.cpp)
+      // we convert it to a stack increment by OffsetReg and
+      // indirect jump to TargetReg
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+      unsigned OffsetReg = MI.getOperand(0).getReg();
+      unsigned TargetReg = MI.getOperand(1).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ADDrr), ARM::SP)
+          .addReg(OffsetReg)
+          .addReg(ARM::SP)
+          .addImm(Pred)
+          .addReg(PredReg)
+          .addReg(0);
+
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BX))
+          .addReg(TargetReg);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::MOVGOTAddr : {
+      // Expand the pseudo-inst that requests for the GOT address
+      // to be materialized into a register. We use MOVW/MOVT for this.
+      // See ARMISelLowering.cpp for a comment on the strategy.
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MachineInstrBuilder LO16, HI16;
+
+      LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                     TII->get(ARM::MOVi16PIC),
+                     DstReg)
+        .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_LO16);
+
+      HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                     TII->get(ARM::MOVTi16PIC))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg)
+        .addExternalSymbol("_GLOBAL_OFFSET_TABLE_", ARMII::MO_HI16);
+
+      AddPICADD_MOVi16_PICID(MI, MBB, MBBI, true,
+                             PredReg, Pred, DstReg, DstIsDead, LO16, HI16);
+
+      (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      LO16.addImm(Pred).addReg(PredReg);
+      HI16.addImm(Pred).addReg(PredReg);
+      TransferImpOps(MI, LO16, HI16);
+      MI.eraseFromParent();
+      return true;
+    }
+    // @LOCALMOD-END
   }
 }
 
@@ -1232,6 +1395,7 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   TRI = TM.getRegisterInfo();
   STI = &TM.getSubtarget<ARMSubtarget>();
   AFI = MF.getInfo<ARMFunctionInfo>();
+  IsRelocPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
 
   bool Modified = false;
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5d45f64912..0b29c74ca2 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -537,6 +537,12 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
   // Require VFP2 for loading fp constants.
   if (!Subtarget->hasVFP2()) return false;
 
+  // @LOCALMOD-START
+  // Don't use constant pools in NaCl.
+  if (FlagSfiDisableCP)
+    return false;
+  // @LOCALMOD-END
+
   // MachineConstantPool wants an explicit alignment.
   unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
   if (Align == 0) {
@@ -589,6 +595,23 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
     }
   }
 
+  // @LOCALMOD-START
+  // No constant pool, use movw+movt for 32-bit values.
+  if (FlagSfiDisableCP && Subtarget->hasV6T2Ops() && VT == MVT::i32) {
+    unsigned Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
+    const TargetRegisterClass *RC = isThumb2 ?
+        &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
+    unsigned ImmReg = createResultReg(RC);
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                            ImmReg).addImm(CI->getZExtValue()));
+    return ImmReg;
+  }
+
+  // Don't use constant pools in NaCl.
+  if (FlagSfiDisableCP)
+    return false;
+  // @LOCALMOD-END
+
   // Load from constant pool.  For now 32-bit only.
   if (VT != MVT::i32)
     return false;
@@ -628,6 +651,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
     (const TargetRegisterClass*)&ARM::GPRRegClass;
   unsigned DestReg = createResultReg(RC);
 
+  // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  bool IsThreadLocal = GVar && GVar->isThreadLocal();
+  if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0;
+
   // Use movw+movt when possible, it avoids constant pool entries.
   // Darwin targets don't support movt with Reloc::Static, see
   // ARMTargetLowering::LowerGlobalAddressDarwin.  Other targets only support
@@ -649,6 +677,12 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
                             DestReg).addGlobalAddress(GV));
   } else {
+    // @LOCALMOD-START
+    // Don't use constant pools in NaCl.
+    if (FlagSfiDisableCP)
+      return false;
+    // @LOCALMOD-END
+
     // MachineConstantPool wants an explicit alignment.
     unsigned Align = TD.getPrefTypeAlignment(GV->getType());
     if (Align == 0) {
@@ -2961,13 +2995,22 @@ bool ARMFastISel::FastLowerArguments() {
 namespace llvm {
   FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
                                 const TargetLibraryInfo *libInfo) {
-    // Completely untested on non-iOS.
     const TargetMachine &TM = funcInfo.MF->getTarget();
 
-    // Darwin and thumb1 only for now.
     const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
-    if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only())
+    // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
+    bool UseFastISel = false;
+    UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only();
+    UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
+    UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
+    if (UseFastISel) {
+      // iOS always has a FP for backtracking, force other targets
+      // to keep their FP when doing FastISel. The emitted code is
+      // currently superior, and in cases like test-suite's lencod
+      // FastISel isn't quite correct when FP is eliminated.
+      TM.Options.NoFramePointerElim = true;
       return new ARMFastISel(funcInfo, libInfo);
+    }
     return 0;
   }
 }
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 483802b130..0cff686481 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -25,6 +25,9 @@
 #include "llvm/IR/Function.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
+// @LOCALMOD-START
+#include "llvm/CodeGen/MachineModuleInfo.h"
+// @LOCALMOD-END
 
 using namespace llvm;
 
@@ -153,6 +156,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   int FramePtrSpillFI = 0;
   int D8SpillFI = 0;
 
+  // @LOCALMOD-START
+  MachineModuleInfo &MMI = MF.getMMI();
+  // This condition was gleaned from x86 / PowerPC / XCore
+  bool needsFrameMoves = STI.isTargetNaCl() &&
+                         (MMI.hasDebugInfo() ||
+                          !MF.getFunction()->doesNotThrow() ||
+                          MF.getFunction()->needsUnwindTableEntry());
+  // @LOCALMOD-END
+  
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
@@ -212,6 +224,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
   // Move past area 1.
   if (GPRCS1Size > 0) MBBI++;
 
+  // @LOCALMOD-START
+  if (needsFrameMoves && GPRCS1Size > 0) {
+    // we just skipped the initial callee save reg instructions, e.g.
+    // push {r4, r5, r6, lr}
+    // NOTE: this likely is not the right thing to do for darwin as it does not
+    //       treat all callee save regs uniformly
+    MCSymbol *AfterRegSave = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::PROLOG_LABEL)).addSym(AfterRegSave);
+    // record the fact that the stack has moved
+    MachineLocation dst(MachineLocation::VirtualFP);
+    MachineLocation src(MachineLocation::VirtualFP, -GPRCS1Size);
+    MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src));
+    // for each callee saved register record where it has been saved
+    int offset = 0;
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      switch (Reg) {
+       case ARM::R4:
+       case ARM::R5:
+       case ARM::R6:
+       case ARM::R7:
+       case ARM::R8:
+       case ARM::R9:
+       case ARM::R10:
+       case ARM::R11:
+       case ARM::LR:
+        offset -= 4;
+        MachineLocation dst(MachineLocation::VirtualFP, offset);
+        MachineLocation src(Reg);
+        MMI.getFrameMoves().push_back(MachineMove(AfterRegSave, dst, src));
+        break;
+      }
+    }
+  }
+  // @LOCALMOD-END
+
   // Set FP to point to the stack slot that contains the previous FP.
   // For iOS, FP is R7, which has now been stored in spill area 1.
   // Otherwise, if this is not iOS, all the callee-saved registers go
@@ -225,8 +273,29 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       .addFrameIndex(FramePtrSpillFI).addImm(0)
       .setMIFlag(MachineInstr::FrameSetup);
     AddDefaultCC(AddDefaultPred(MIB));
+    // @LOCALMOD-START
+    if (needsFrameMoves) {
+      // we just emitted the fp pointer setup instruction, e.g.
+      // add      r11, sp, #8
+      MCSymbol *AfterFramePointerInit = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::PROLOG_LABEL)).addSym(AfterFramePointerInit);
+      // record the fact that the frame pointer is now tracking the "cfa"
+      // Note, gcc and llvm have a slightly different notion of where the
+      // frame pointer should be pointing. gcc points after the return address
+      // and llvm one word further down (two words = 8).
+      // This should be fine as long as we are consistent.
+      // NOTE: this is related to the offset computed for
+      // ISD::FRAME_TO_ARGS_OFFSET
+      MachineLocation dst(MachineLocation::VirtualFP);
+      MachineLocation src(FramePtr, 8);
+      MMI.getFrameMoves().push_back(MachineMove(AfterFramePointerInit, dst, src));
+    }
+    // @LOCALMOD-END
   }
 
+
+
   // Move past area 2.
   if (GPRCS2Size > 0) MBBI++;
 
@@ -275,6 +344,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       // an inconsistent state (pointing to the middle of callee-saved area).
       // The interrupt handler can end up clobbering the registers.
       AFI->setShouldRestoreSPFromFP(true);
+
+    // @LOCALMOD-START
+    // we only track sp changes if do not have the fp to figure out where
+    // stack frame lives
+    if (needsFrameMoves && !HasFP) {
+      MCSymbol *AfterStackUpdate = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::PROLOG_LABEL)).addSym(AfterStackUpdate);
+      MachineLocation dst(MachineLocation::VirtualFP);
+      MachineLocation src(MachineLocation::VirtualFP, - NumBytes - GPRCS1Size);
+      MMI.getFrameMoves().push_back(MachineMove(AfterStackUpdate, dst, src));
+    }
+    // @LOCALMOD-END
   }
 
   if (STI.isTargetELF() && hasFP(MF))
@@ -670,7 +752,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
         continue;
 
-      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps() &&
+          !STI.isTargetNaCl() /* @LOCALMOD */) {
         Reg = ARM::PC;
         LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
         // Fold the return instruction into the LDM.
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index efa255a557..9838e1e9d1 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -27,7 +27,8 @@ protected:
 
 public:
   explicit ARMFrameLowering(const ARMSubtarget &sti)
-    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4,
+      4), // @LOCALMOD
       STI(sti) {
   }
 
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9e1782e119..fb4f190186 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -36,8 +36,17 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 
+// @LOCALMOD-START
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+}
+// @LOCALMOD-END
+
 using namespace llvm;
 
+
 static cl::opt<bool>
 DisableShifterOp("disable-shifter-op", cl::Hidden,
   cl::desc("Disable isel of shifter-op"),
@@ -112,21 +121,24 @@ public:
   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
 
-  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+  AddrMode2Type SelectAddrMode2Worker(SDNode *Op, SDValue N, SDValue &Base,
                                       SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2Base(SDNode *Op,
+                           SDValue N, SDValue &Base, SDValue &Offset,
                            SDValue &Opc) {
-    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+    return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_BASE;
   }
 
-  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2ShOp(SDNode *Op,
+                           SDValue N, SDValue &Base, SDValue &Offset,
                            SDValue &Opc) {
-    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+    return SelectAddrMode2Worker(Op, N, Base, Offset, Opc) == AM2_SHOP;
   }
 
-  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+  bool SelectAddrMode2(SDNode *Op, 
+                       SDValue N, SDValue &Base, SDValue &Offset,
                        SDValue &Opc) {
-    SelectAddrMode2Worker(N, Base, Offset, Opc);
+    SelectAddrMode2Worker(Op, N, Base, Offset, Opc);
 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
     // This always matches one way or another.
     return true;
@@ -139,7 +151,7 @@ public:
   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
-  bool SelectAddrMode3(SDValue N, SDValue &Base,
+  bool SelectAddrMode3(SDNode *Op, SDValue N, SDValue &Base,
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
@@ -521,6 +533,22 @@ bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
   return true;
 }
 
+// @LOCALMOD-START
+static bool ShouldOperandBeUnwrappedForUseAsBaseAddress(
+  SDValue& N, const ARMSubtarget* Subtarget) {
+  assert (N.getOpcode() == ARMISD::Wrapper);
+  // Never use this transformation if constant island pools are disallowed 
+  if (FlagSfiDisableCP) return false;
+
+  // always apply this when we do not have movt/movw available
+  // (if we do have movt/movw we be able to get rid of the
+  // constant pool entry altogether)
+  if (!Subtarget->useMovt()) return true;
+  // explain why we do not want to use this for TargetGlobalAddress
+  if (N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) return true;
+  return false;
+}
+// @LOCALMOD-END
 
 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
                                           SDValue &Base,
@@ -539,8 +567,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
     }
 
     if (N.getOpcode() == ARMISD::Wrapper &&
-        !(Subtarget->useMovt() &&
-                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+        // @LOCALMOD
+        ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     } else
       Base = N;
@@ -574,6 +602,11 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
 
 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
+  // @LOCALMOD-BEGIN
+  // Disallow offsets of Reg + Reg (which may escape sandbox).
+  if (Subtarget->isTargetNaCl())
+    return false;
+  // @LOCALMOD-END
   if (N.getOpcode() == ISD::MUL &&
       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -669,10 +702,24 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
 
 //-----
 
-AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDNode *Op,
+                                                     SDValue N,
                                                      SDValue &Base,
                                                      SDValue &Offset,
+// @LOCALMOD-START
+// Note: In the code below we do not want "Offset" to be real register to
+// not violate ARM sandboxing.
+// @LOCALMOD-END
                                                      SDValue &Opc) {
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  // This is neither a sandboxable load nor a sandboxable store.
+  if (!restrict_addressing_modes_for_nacl) {
+  // @LOCALMOD-END
+
   if (N.getOpcode() == ISD::MUL &&
       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
@@ -696,6 +743,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       }
     }
   }
+  } // @LOCALMOD
 
   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
       // ISD::OR that is equivalent to an ADD.
@@ -705,8 +753,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               !(Subtarget->useMovt() &&
-                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+               // @LOCALMOD
+               ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getRegister(0, MVT::i32);
@@ -739,7 +787,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
       return AM2_BASE;
     }
   }
-
+  
   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
     // Compute R +/- (R << N) and reuse it.
     Base = N;
@@ -749,6 +797,24 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
                                     MVT::i32);
     return AM2_BASE;
   }
+  
+  // @LOCALMOD-START
+  // Keep load and store addressing modes simple
+  if (restrict_addressing_modes_for_nacl) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+  // @LOCALMOD-END
 
   // Otherwise this is R +/- [possibly shifted] R.
   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
@@ -817,13 +883,27 @@ bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
     return false;
 
+  // @LOCALMOD-BEGIN
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  // @LOCALMOD-END
+
+
   Offset = N;
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
   unsigned ShAmt = 0;
   if (ShOpcVal != ARM_AM::no_shift) {
     // Check to see if the RHS of the shift is a constant, if not, we can't fold
     // it.
-    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+
+    //if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    // @LOCALMOD-BEGIN
+    // Neither a sandboxable load nor a sandboxable store.
+    if (!restrict_addressing_modes_for_nacl && Sh ) {
+    // @LOCALMOD-END
       ShAmt = Sh->getZExtValue();
       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
         Offset = N.getOperand(0);
@@ -886,16 +966,25 @@ bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+bool ARMDAGToDAGISel::SelectAddrMode3(SDNode *Op, SDValue N,
                                       SDValue &Base, SDValue &Offset,
                                       SDValue &Opc) {
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+     (FlagSfiLoad && (Op->getOpcode() == ISD::LOAD)) ||
+     (FlagSfiStore && (Op->getOpcode() == ISD::STORE));
+  if (!restrict_addressing_modes_for_nacl) {
+  // @LOCALMOD-END
   if (N.getOpcode() == ISD::SUB) {
+
     // X - C  is canonicalize to X + -C, no need to handle it here.
     Base = N.getOperand(0);
     Offset = N.getOperand(1);
     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
     return true;
   }
+  } // @LOCALMOD-END
 
   if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
@@ -928,6 +1017,16 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
     return true;
   }
 
+  // @LOCALMOD-START
+  // A sandboxable load or a sandboxable store.
+  if (restrict_addressing_modes_for_nacl) {
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+  // @LOCALMOD-END
+
   Base = N.getOperand(0);
   Offset = N.getOperand(1);
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
@@ -962,8 +1061,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
       int FI = cast<FrameIndexSDNode>(N)->getIndex();
       Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
     } else if (N.getOpcode() == ARMISD::Wrapper &&
-               !(Subtarget->useMovt() &&
-                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+               // @LOCALMOD
+               ShouldOperandBeUnwrappedForUseAsBaseAddress(N, Subtarget)) {
       Base = N.getOperand(0);
     }
     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -2583,6 +2682,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
                  !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
     }
 
+    if (FlagSfiDisableCP) UseCP = false; // @LOCALMOD
+
     if (UseCP) {
       SDValue CPIdx =
         CurDAG->getTargetConstantPool(ConstantInt::get(
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index e49cfc4985..8bc6a215fe 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -48,6 +48,15 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetOptions.h"
+
+// @LOCALMOD-START
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiDisableCP;
+}
+// @LOCALMOD-END
+
 using namespace llvm;
 
 STATISTIC(NumTailCalls, "Number of tail calls");
@@ -255,8 +264,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setLibcallName(RTLIB::SHL_I128, 0);
   setLibcallName(RTLIB::SRL_I128, 0);
   setLibcallName(RTLIB::SRA_I128, 0);
-
-  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
+  // @LOCALMOD: use standard names and calling conventions for pnacl
+  if (!Subtarget->isTargetNaCl() && Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 
     // Double-precision floating-point arithmetic helper functions
     // RTABI chapter 4.1.2, Table 2
     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
@@ -701,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
-
+  // @LOCALMOD-START
+  if (!Subtarget->useInlineJumpTables())
+    setOperationAction(ISD::JumpTable,     MVT::i32,   Custom);
+  // @LOCALMOD-END
+  
   setOperationAction(ISD::TRAP, MVT::Other, Legal);
 
   // Use the default implementation.
@@ -717,8 +730,22 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     // personality function.
     setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
     setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
-    setExceptionPointerRegister(ARM::R0);
-    setExceptionSelectorRegister(ARM::R1);
+    // @LOCALMOD-START
+    if (Subtarget->isTargetNaCl()) {
+      // we use the first caller saved regs here
+      // c.f.: llvm-gcc/llvm-gcc-4.2/gcc/unwind-dw2.c::uw_install_context
+      // NOTE: these are related to the _Unwind_PNaClSetResult{0,1} functions
+      setExceptionPointerRegister(ARM::R4);
+      setExceptionSelectorRegister(ARM::R5);
+  
+      setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+
+      setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+    } else {
+      setExceptionPointerRegister(ARM::R0);
+      setExceptionSelectorRegister(ARM::R1);
+    }
+    // @LOCALMOD-END
   }
 
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
@@ -804,8 +831,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
-  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
-
+  // @LOCALMOD-START
+  //setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT,     MVT::Other,
+                     Subtarget->useInlineJumpTables() ? Custom : Expand);
+  // @LOCALMOD-END
+  
   // We don't support sin/cos/fmod/copysign/pow
   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
@@ -844,6 +875,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     }
   }
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TARGET_ARCH,          MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
   // We have target-specific dag combine patterns for the following nodes:
   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
@@ -936,6 +975,10 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  // @LOCALMOD-START
+  case ARMISD::WrapperJT2:    return "ARMISD::WrapperJT2"; 
+  case ARMISD::EH_RETURN:     return "ARMISD::EH_RETURN"; 
+  // @LOCALMOD-END
   case ARMISD::CALL:          return "ARMISD::CALL";
   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
@@ -1752,6 +1795,29 @@ ARMTargetLowering::HandleByVal(
           State->getCallOrPrologue() == Call) &&
          "unhandled ParmContext");
 
+
+  // @LOCALMOD-BEGIN
+  // The original mechanism tries to split a byval argument between registers
+  // and the stack.  It doesn't work correctly yet, so disable it.
+  // This leaves the entire byval argument on the stack, and the rest
+  // of the parameters will need to be on the stack as well, to have
+  // the correct order for var-args.  We remember the fact that there was
+  // a byval param that forced this, so that we know not to use the
+  // handle var-args reg-save area.
+  // PR11018.
+  if (Subtarget->isTargetNaCl()) {
+    if ((!State->isFirstByValRegValid()) &&
+        (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+      State->setHasByValInRegPosition();
+    }
+    // Confiscate any remaining parameter registers to preclude their
+    // assignment to subsequent parameters.
+    while (State->AllocateReg(GPRArgRegs, 4))
+      ;
+    return;
+  }
+  // @LOCALMOD-END
+
   // For in-prologue parameters handling, we also introduce stack offset
   // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
   // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
@@ -1815,6 +1881,7 @@ ARMTargetLowering::HandleByVal(
       }
     }
   }
+
 }
 
 /// MatchingStackOffset - Return true if the given stack call argument is
@@ -2199,7 +2266,14 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
 }
 
 unsigned ARMTargetLowering::getJumpTableEncoding() const {
-  return MachineJumpTableInfo::EK_Inline;
+  // @LOCALMOD-BEGIN
+  if (Subtarget->useInlineJumpTables()) { 
+    return MachineJumpTableInfo::EK_Inline;
+  } else {
+    // TODO: Find a better way to call the super-class.
+    return TargetLowering::getJumpTableEncoding();
+  }
+  // @LOCALMOD-END
 }
 
 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
@@ -2232,28 +2306,120 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
 }
 
+// @LOCALMOD-START
+// more conventional jumptable implementation
+SDValue ARMTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  assert(!Subtarget->useInlineJumpTables() &&
+         "inline jump tables not custom lowered");
+  const DebugLoc dl = Op.getDebugLoc();
+  EVT PTy = getPointerTy();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+  return DAG.getNode(ARMISD::WrapperJT2, dl, MVT::i32, JTI);
+}
+
+//////////////////////////////////////////////////////////////////////
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/stubs/tls_params.h
+SDValue ARMTargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tls_offset (size_t tls_size) {
+  //   return 8;
+  // }
+  return DAG.getConstant(8, Op.getValueType().getSimpleVT());
+}
+
+SDValue ARMTargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) {
+  //   return -tdb_size;
+  // }
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+                     Op.getOperand(0));
+}
+
+SDValue
+ARMTargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const {
+  // size_t __nacl_target_arch () {
+  //   return PnaclTargetArchitectureARM_32;
+  // }
+  return DAG.getConstant(PnaclTargetArchitectureARM_32,
+                         Op.getValueType().getSimpleVT());
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// @LOCALMOD-END
+
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
 SDValue
 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                                                  SelectionDAG &DAG) const {
   DebugLoc dl = GA->getDebugLoc();
   EVT PtrVT = getPointerTy();
-  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-  ARMConstantPoolValue *CPV =
-    ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
-                                    ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
-  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
-  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
-  SDValue Chain = Argument.getValue(1);
+  // @LOCALMOD-BEGIN
+  SDValue Chain;
+  SDValue Argument;
+
+  if (FlagSfiDisableCP) {
+    // With constant pools "disabled" (moved to rodata), this constant pool
+    // entry is no longer in text, and simultaneous PC relativeness
+    // and CP Addr relativeness is no longer expressible.
+    // So, instead of having:
+    //
+    // .LCPI12_0:
+    //   .long var(tlsgd)-((.LPC12_0+8) - .)
+    // ...
+    //    ldr r2, .LCPI12_0
+    // .LPC12_0:
+    //    add r0, pc, r2
+    //
+    // we have:
+    //
+    // .LCPI12_0:
+    //   .long var(tlsgd)
+    // ...
+    //    // get addr of .LCPI12_0 into r2
+    //    ldr r0, [r2]
+    //    add r0, r2, r0
+    // (1) No longer subtracting pc, so no longer adding that back
+    // (2) Not adding "." in the CP entry, so adding it via instructions.
+    //
+    unsigned char PCAdj = 0;
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::TLSGD,
+                                        false);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   false, false, false, 0);
+    Chain = Argument.getValue(1);
+    Argument = DAG.getNode(ISD::ADD, dl, PtrVT, Argument, CPAddr);
+  } else { // sort of @LOCALMOD-END
+    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+    Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); // @ LOCALMOD
+    Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+    Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    Chain = Argument.getValue(1); // @LOCALMOD
 
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+  } // @LOCALMOD-END
 
   // call __tls_get_addr.
   ArgListTy Args;
@@ -2290,25 +2456,49 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
     MachineFunction &MF = DAG.getMachineFunction();
     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-    // Initial exec model.
-    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
-    ARMConstantPoolValue *CPV =
+
+    // @LOCALMOD-BEGIN
+    if (FlagSfiDisableCP) {
+      // Similar to change to LowerToTLSGeneralDynamicModel, and
+      // for the same reason.
+      unsigned char PCAdj = 0;
+      ARMConstantPoolValue *CPV =
+        ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
+                                        ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
+                                        false);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      Chain = Offset.getValue(1);
+
+      Offset = DAG.getNode(ISD::ADD, dl, PtrVT, Offset, CPAddr);
+
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    } else { // sort of @LOCALMOD-END (indentation)
+      // Initial exec model.
+      unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+      ARMConstantPoolValue *CPV =
       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
                                       true);
-    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
-    Chain = Offset.getValue(1);
+      Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+      Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+      Chain = Offset.getValue(1);
 
-    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+      Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, false, 0);
+    } // @LOCALMOD-END
   } else {
     // local exec model
     assert(model == TLSModel::LocalExec);
@@ -2458,17 +2648,55 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   EVT PtrVT = getPointerTy();
   DebugLoc dl = Op.getDebugLoc();
-  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+
+  // @LOCALMOD-BEGIN
+  if (FlagSfiDisableCP) {
+    // With constant pools "disabled" (moved to rodata), the constant pool
+    // entry is no longer in text, and the PC relativeness is
+    // no longer expressible.
+    //
+    // Instead of having:
+    //
+    // .LCPI12_0:
+    //   .long _GLOBAL_OFFSET_TABLE_-(.LPC12_0+8)
+    // ...
+    //    ldr r2, .LCPI12_0
+    // .LPC12_0:
+    //    add r0, pc, r2
+    //
+    // Things to try:
+    // (1) get the address of the GOT through a pc-relative MOVW / MOVT.
+    //
+    //    movw r0, :lower16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8)
+    //    movt r0, :upper16:_GLOBAL_OFFSET_TABLE_ - (.LPC12_0 + 8)
+    // .LPC12_0:
+    //    add r0, pc, r0
+    //
+    // (2) Make the constant pool entry relative to its own location
+    //
+    // .LCPI12_0:
+    //   .long _GLOBAL_OFFSET_TABLE_-.
+    // ...
+    //    // get address of LCPI12_0 into r0 (possibly 3 instructions for PIC)
+    //    ldr r1, [r0]
+    //    add r1, r0, r1
+    //
+    // We will try (1) for now, since (2) takes about 3 more instructions
+    // (and one of them is a load).
+    return DAG.getNode(ARMISD::WrapperGOT, dl, MVT::i32);
+  } else { // Sort of LOCALMOD-END (indentation only
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   ARMConstantPoolValue *CPV =
     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
                                   ARMPCLabelIndex, PCAdj);
-  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               MachinePointerInfo::getConstantPool(),
-                               false, false, false, 0);
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
-  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, false, 0);
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+  } // @LOCALMOD-END
 }
 
 SDValue
@@ -2494,6 +2722,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   DebugLoc dl = Op.getDebugLoc();
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::nacl_read_tp:  // @LOCALMOD
   case Intrinsic::arm_thread_pointer: {
     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
@@ -2638,6 +2867,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
     unsigned RBegin, REnd;
     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
     NumGPRs = REnd - RBegin;
+    // @LOCALMOD-BEGIN
+  } else if (Subtarget->isTargetNaCl() && CCInfo.hasByValInRegPosition())
+    NumGPRs = 0;
+    // @LOCALMOD-END
   } else {
     unsigned int firstUnalloced;
     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
@@ -2688,6 +2921,10 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
     firstRegToSaveIndex = RBegin - ARM::R0;
     lastRegToSaveIndex = REnd - ARM::R0;
+  // @LOCALMOD-BEGIN
+  } else if (Subtarget->isTargetNaCl() && CCInfo.hasByValInRegPosition())
+    firstRegToSaveIndex = 4; // Nothing to save.
+  // @LOCALMOD-END
   } else {
     firstRegToSaveIndex = CCInfo.getFirstUnallocated
       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
@@ -2781,7 +3018,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   CCInfo.AnalyzeFormalArguments(Ins,
                                 CCAssignFnForNode(CallConv, /* Return*/ false,
                                                   isVarArg));
-
+  
   SmallVector<SDValue, 16> ArgValues;
   int lastInsIndex = -1;
   SDValue ArgValue;
@@ -5459,6 +5696,38 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
 }
 
+// @LOCALMOD-START
+// An EH_RETURN is the result of lowering llvm.eh.return.i32 which in turn is
+// generated from __builtin_eh_return (offset, handler)
+// The effect of this is to adjust the stack pointer by "offset"
+// and then branch to "handler".
+SDValue ARMTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG)
+    const {
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  // Store stack offset in R2, jump target in R3, dummy return value in R0
+  // The dummy return value is needed to make the use-def chains happy,
+  // because the EH_RETURN instruction uses the isReturn attribute, which
+  // means preceding code needs to define the return register (R0 on ARM).
+  // http://code.google.com/p/nativeclient/issues/detail?id=2643
+  unsigned OffsetReg = ARM::R2;
+  unsigned AddrReg = ARM::R3;
+  unsigned ReturnReg = ARM::R0;
+  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
+  Chain = DAG.getCopyToReg(Chain, dl, AddrReg, Handler);
+  Chain = DAG.getCopyToReg(Chain, dl, ReturnReg, DAG.getIntPtrConstant(0));
+  return DAG.getNode(ARMISD::EH_RETURN, dl,
+                     MVT::Other,
+                     Chain,
+                     DAG.getRegister(OffsetReg, MVT::i32),
+                     DAG.getRegister(AddrReg, getPointerTy()));
+}
+// @LOCALMOD-END
+
+
 static SDValue
 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   // Convert to float
@@ -5704,6 +5973,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
+  case ISD::JumpTable:    return LowerJumpTable(Op, DAG); // @LOCALMOD
   case ISD::GlobalAddress:
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
@@ -5722,6 +5992,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
+  // @LOCALMOD-START
+  // The exact semantics of this ISD are not completely clear.
+  // LLVM seems to always point the fp after the push ra and the old fp, i.e.
+  // two register slots after the beginning of the stack frame.
+  // It is not clear what happens when there is no frame pointer but
+  // but llvm unlike gcc seems to always force one when this node is
+  // encountered.
+  case ISD::FRAME_TO_ARGS_OFFSET: return DAG.getIntPtrConstant(2*4);
+  case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
+  // @LOCALMOD-END
+   
   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -5753,6 +6034,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ATOMIC_LOAD:
   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
+  // @LOCALMOD-BEGIN
+  case ISD::NACL_TP_TLS_OFFSET:    return LowerNaClTpTlsOffset(Op, DAG);
+  case ISD::NACL_TP_TDB_OFFSET:    return LowerNaClTpTdbOffset(Op, DAG);
+  case ISD::NACL_TARGET_ARCH:      return LowerNaClTargetArch(Op, DAG);
+  // @LOCALMOD-END
   }
 }
 
@@ -7032,7 +7318,11 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
       AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
                      .addReg(VReg1)
                      .addImm(LoopSize >> 16));
-  } else {
+  } else if (FlagSfiDisableCP) { // @LOCALMOD-START
+    BuildMI(BB, dl, TII->get(ARM::MOVi32imm))
+      .addReg(varEnd, RegState::Define)
+      .addImm(LoopSize);
+  } else { // @LOCALMOD-END
     MachineConstantPool *ConstantPool = MF->getConstantPool();
     Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
     const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
@@ -10017,6 +10307,16 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   if (Subtarget->isThumb1Only())
     return false;
 
+  // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+      ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) ||
+       (FlagSfiStore && N->getOpcode() == ISD::STORE));
+  if (restrict_addressing_modes_for_nacl) {
+    return false;
+  }
+  // @LOCALMOD-END
+
   EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
@@ -10055,7 +10355,15 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
                                                    SelectionDAG &DAG) const {
   if (Subtarget->isThumb1Only())
     return false;
-
+   // @LOCALMOD-START
+  // Avoid two reg addressing mode for loads and stores
+  const bool restrict_addressing_modes_for_nacl =
+      ((FlagSfiLoad && N->getOpcode() == ISD::LOAD) ||
+       (FlagSfiStore && N->getOpcode() == ISD::STORE));
+  if (restrict_addressing_modes_for_nacl) {
+    return false;
+  }
+  // @LOCALMOD-END
   EVT VT;
   SDValue Ptr;
   bool isSEXTLoad = false;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 426010e295..3a8cb0be8a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -40,10 +40,13 @@ namespace llvm {
       WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
                     // PIC mode.
       WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
-
+      // @LOCALMOD-START
+      WrapperJT2,   // like WrapperJT but without the UID
+      WrapperGOT,   // A Wrapper node for GOT addresses
+      EH_RETURN,    // For LowerEH_RETURN
+      // @LOCALMOD-END
       // Add pseudo op to model memcpy for struct byval.
       COPY_STRUCT_BYVAL,
-
       CALL,         // Function call.
       CALL_PRED,    // Function call that's predicable.
       CALL_NOLINK,  // Function call with branch not branch-and-link.
@@ -442,6 +445,14 @@ namespace llvm {
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                  SelectionDAG &DAG,
                                  TLSModel::Model model) const;
+    // @LOCALMOD-START
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const;
+    // @LOCALMOD-END
+
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1bd174e341..4a201a7459 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -95,6 +95,14 @@ def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
 def ARMWrapperDYN    : SDNode<"ARMISD::WrapperDYN",  SDTIntUnaryOp>;
 def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
 def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+// @LOCALMOD-START
+// support non-inline jumptables
+// we do not use the extre uid immediate that comes with ARMWrapperJT
+// TODO(robertm): figure out what it is used for
+def ARMWrapperJT2    : SDNode<"ARMISD::WrapperJT2",  SDTIntUnaryOp>;
+// Support for MOVW/MOVT'ing the GOT address directly into a register.
+def ARMWrapperGOT       : SDNode<"ARMISD::WrapperGOT",  SDTPtrLeaf>;
+// @LOCALMOD-END
 
 def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
                               [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
@@ -242,6 +250,7 @@ def IsARM            : Predicate<"!Subtarget->isThumb()">,
 def IsIOS            : Predicate<"Subtarget->isTargetIOS()">;
 def IsNotIOS         : Predicate<"!Subtarget->isTargetIOS()">;
 def IsNaCl           : Predicate<"Subtarget->isTargetNaCl()">;
+def IsNotNaCl        : Predicate<"!Subtarget->isTargetNaCl()">;
 def UseNaClTrap      : Predicate<"Subtarget->useNaClTrap()">,
                                  AssemblerPredicate<"FeatureNaClTrap", "NaCl">;
 def DontUseNaClTrap  : Predicate<"!Subtarget->useNaClTrap()">;
@@ -278,6 +287,11 @@ def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONFor
 def IsLE             : Predicate<"TLI.isLittleEndian()">;
 def IsBE             : Predicate<"TLI.isBigEndian()">;
 
+// @LOCALMOD-BEGIN
+def UseConstPool : Predicate<"Subtarget->useConstPool()">;
+def DontUseConstPool : Predicate<"!Subtarget->useConstPool()">;
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -835,7 +849,8 @@ def postidx_reg : Operand<i32> {
 // use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg).
 def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; }
 def addrmode2 : Operand<i32>,
-                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+                ComplexPattern<i32, 3, "SelectAddrMode2", [],
+                               [SDNPWantRoot]> { // @LOCALMOD
   let EncoderMethod = "getAddrMode2OpValue";
   let PrintMethod = "printAddrMode2Operand";
   let ParserMatchClass = AddrMode2AsmOperand;
@@ -874,8 +889,9 @@ def am2offset_imm : Operand<i32>,
 //
 // FIXME: split into imm vs. reg versions.
 def AddrMode3AsmOperand : AsmOperandClass { let Name = "AddrMode3"; }
-class AddrMode3 : Operand<i32>,
-                  ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+def AddrMode3 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode3", [],
+                               [SDNPWantRoot]> { // @LOCALMOD
   let EncoderMethod = "getAddrMode3OpValue";
   let ParserMatchClass = AddrMode3AsmOperand;
   let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4dacb86df4..3ce6c7e1ec 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3539,12 +3539,24 @@ def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
 // ConstantPool, GlobalAddress, and JumpTable
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
            Requires<[IsThumb2, DontUseMovt]>;
+// @LOCALMOD-START
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>,
+            Requires<[IsThumb2, DontUseMovt]>;
+// @LOCALMOD-END
 def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
 def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
            Requires<[IsThumb2, UseMovt]>;
 
 def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
             (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+// @LOCALMOD-START
+def : T2Pat<(ARMWrapper  tconstpool :$dst), (t2MOVi32imm tconstpool :$dst)>,
+            Requires<[IsThumb2, UseMovt, DontUseConstPool]>;
+def : T2Pat<(ARMWrapper  tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>,
+            Requires<[IsThumb2, UseMovt, UseConstPool]>;
+def : T2Pat<(ARMWrapperJT2  tjumptable :$dst), (t2MOVi32imm tjumptable :$dst)>,
+            Requires<[IsThumb2, UseMovt]>;
+// @LOCALMOD-END
 
 // Pseudo instruction that combines ldr from constpool and add pc. This should
 // be expanded into two instructions late to allow if-conversion and
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index c8ed5760f9..5040ade360 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -707,6 +707,7 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
 /// ldmia rn, <ra, rb, rc>
 /// =>
 /// ldmdb rn!, <ra, rb, rc>
+/// @LOCALMOD This is especially useful for rn == sp
 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
                                                MachineBasicBlock::iterator MBBI,
                                                bool &Advance,
@@ -1402,7 +1403,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
 ///   mov pc, lr
 /// =>
 ///   ldmfd sp!, {..., pc}
+// @LOCALMOD for sfi we do not want this to happen
 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+  // @LOCALMOD-START
+  // For NaCl, do not load into PC directly for a return, since NaCl requires
+  // masking the address first.
+  if (STI->isTargetNaCl()) {
+    return false;
+  }
+  // @LOCALMOD-END
+
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index b641483200..5a65efe3b5 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -123,3 +123,57 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
       OutMI.addOperand(MCOp);
   }
 }
+
+// @LOCALMOD-BEGIN
+// Unlike LowerARMMachineInstrToMCInst, the opcode has already been set.
+// Otherwise, this is like LowerARMMachineInstrToMCInst, but with special
+// handling where the "immediate" is PC Relative
+// (used for MOVi16PIC / MOVTi16PIC, etc. -- see .td file)
+void llvm::LowerARMMachineInstrToMCInstPCRel(const MachineInstr *MI,
+                                             MCInst &OutMI,
+                                             ARMAsmPrinter &AP,
+                                             unsigned ImmIndex,
+                                             unsigned PCIndex,
+                                             MCSymbol *PCLabel,
+                                             unsigned PCAdjustment) {
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (i == ImmIndex) {
+      MCContext &Ctx = AP.OutContext;
+      const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, Ctx);
+      if (PCAdjustment) {
+        const MCExpr *AdjExpr = MCConstantExpr::Create(PCAdjustment, Ctx);
+        PCRelExpr = MCBinaryExpr::CreateAdd(PCRelExpr, AdjExpr, Ctx);
+      }
+
+      // Get the usual symbol operand, then subtract the PCRelExpr.
+      const MachineOperand &MOImm = MI->getOperand(ImmIndex);
+      MCOperand SymOp;
+      bool DidLower = AP.lowerOperand(MOImm, SymOp);
+      assert (DidLower && "Immediate-like operand should have been lowered");
+
+      const MCExpr *Expr = SymOp.getExpr();
+      ARMMCExpr::VariantKind TargetKind = ARMMCExpr::VK_ARM_None;
+      /* Unwrap and rewrap the ARMMCExpr */
+      if (Expr->getKind() == MCExpr::Target) {
+        const ARMMCExpr *TargetExpr = cast<ARMMCExpr>(Expr);
+        TargetKind = TargetExpr->getKind();
+        Expr = TargetExpr->getSubExpr();
+      }
+      Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, Ctx);
+      if (TargetKind != ARMMCExpr::VK_ARM_None) {
+        Expr = ARMMCExpr::Create(TargetKind, Expr, Ctx);
+      }
+      MCOperand MCOp = MCOperand::CreateExpr(Expr);
+      OutMI.addOperand(MCOp);
+    } else if (i == PCIndex) {  // dummy index already handled as PCLabel
+      continue;
+    } else {
+      MCOperand MCOp;
+      if (AP.lowerOperand(MI->getOperand(i), MCOp)) {
+        OutMI.addOperand(MCOp);
+      }
+    }
+  }
+}
+// @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMNaClHeaders.cpp b/lib/Target/ARM/ARMNaClHeaders.cpp
new file mode 100644
index 0000000000..a0b89ab05f
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClHeaders.cpp
@@ -0,0 +1,176 @@
+//===-- ARMNaClHeaders.cpp - Print SFI headers to an ARM .s file -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in ARM assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "ARMNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitSFIHeaders(raw_ostream &O) {
+  O << " @ ========================================\n";
+  O << "@ Branch: " << FlagSfiBranch << "\n";
+  O << "@ Stack: " << FlagSfiStack << "\n";
+  O << "@ Store: " << FlagSfiStore << "\n";
+  O << "@ Data: " << FlagSfiData << "\n";
+
+  O << " @ ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_illegal_if_at_bundle_begining\n"
+    "\tsfi_long_based_on_pos 1 0 0 0 0xe125be70\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0xe320f000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0xe320f000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " @ ========================================\n";
+  if (FlagSfiZeroMask) {
+    // This mode sets all mask to zero which makes them into nops
+    // this is useful for linking this code against non-sandboxed code
+    // for debugging purposes
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0x00000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0\n"
+      "\t.endm\n"
+      "\n\n";
+
+  } else {
+    O <<
+      "\t.macro sfi_data_mask reg cond\n"
+      "\tbic\\cond \\reg, \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_data_tst reg\n"
+      "\ttst \\reg, #0xc0000000\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_code_mask reg cond=\n"
+      "\tbic\\cond \\reg, \\reg, #0xc000000f\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " @ ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble cond=\n"
+      "\tsfi_nops_to_force_slot3\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+    
+    // This is used just before "bx rx"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble link cond=\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is use just before "blx rx"
+    O <<
+      "\t.macro sfi_indirect_call_preamble link cond=\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\tsfi_code_mask \\link \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " @ ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg, \\cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_tst \\reg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_cstore_preamble reg cond\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " @ ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.cpp b/lib/Target/ARM/ARMNaClRewritePass.cpp
new file mode 100644
index 0000000000..505521cdc6
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.cpp
@@ -0,0 +1,1108 @@
+//===-- ARMNaClRewritePass.cpp - Native Client Rewrite Pass  ------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+//
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-sfi"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <set>
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace llvm {
+
+cl::opt<bool>
+FlagSfiData("sfi-data", cl::desc("use illegal at data bundle beginning"));
+
+cl::opt<bool>
+FlagSfiLoad("sfi-load", cl::desc("enable sandboxing for load"));
+
+cl::opt<bool>
+FlagSfiStore("sfi-store", cl::desc("enable sandboxing for stores"));
+
+cl::opt<bool>
+FlagSfiStack("sfi-stack", cl::desc("enable sandboxing for stack changes"));
+
+cl::opt<bool>
+FlagSfiBranch("sfi-branch", cl::desc("enable sandboxing for branches"));
+
+cl::opt<bool>
+FlagNaClUseM23ArmAbi("nacl-use-m23-arm-abi",
+                     cl::desc("use the Chrome M23 ARM ABI"));
+
+}
+
+namespace {
+  class ARMNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxMemoryReferencesInBlock(MachineBasicBlock &MBB);
+    void SandboxMemory(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI,
+                       MachineInstr &MI,
+                       int AddrIdx,
+                       bool IsLoad);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void LightweightVerify(MachineFunction &MF);
+  };
+  char ARMNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == ARM::BX_RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BX:
+   case ARM::TAILJMPr:
+    return true;
+  }
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return MI.getOpcode() == ARM::BLX;
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+   default: return false;
+   case ARM::BL:
+   case ARM::BL_pred:
+   case ARM::TPsoft:
+     return true;
+  }
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  DEBUG({
+      dbgs() << MI;
+      dbgs() << MI.getNumOperands() << " operands:" << "\n";
+      for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+        const MachineOperand& op = MI.getOperand(i);
+        dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+      }
+      dbgs() << "\n";
+    });
+}
+
+static void DumpBasicBlockVerbose(const MachineBasicBlock &MBB) {
+  DEBUG({
+      dbgs() << "\n<<<<< DUMP BASIC BLOCK START\n";
+      for (MachineBasicBlock::const_iterator
+               MBBI = MBB.begin(), MBBE = MBB.end();
+           MBBI != MBBE;
+           ++MBBI) {
+        DumpInstructionVerbose(*MBBI);
+      }
+      dbgs() << "<<<<< DUMP BASIC BLOCK END\n\n";
+    });
+}
+
+/**********************************************************************/
+/* Exported functions */
+
+namespace ARM_SFI {
+
+bool IsStackChange(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
+  return MI.modifiesRegister(ARM::SP, TRI);
+}
+
+bool NextInstrMasksSP(const MachineInstr &MI) {
+  MachineBasicBlock::const_iterator It = &MI;
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  MachineBasicBlock::const_iterator next = ++It;
+  if (next == MBB->end()) {
+    return false;
+  }
+
+  const MachineInstr &next_instr = *next;
+  unsigned opcode = next_instr.getOpcode();
+  return (opcode == ARM::SFI_DATA_MASK) &&
+      (next_instr.getOperand(0).getReg() == ARM::SP);
+}
+
+bool IsSandboxedStackChange(const MachineInstr &MI) {
+  // Calls do not change the stack on ARM but they have implicit-defs, so
+  // make sure they do not get sandboxed.
+  if (MI.getDesc().isCall())
+    return true;
+
+  unsigned opcode = MI.getOpcode();
+  switch (opcode) {
+    default: break;
+
+    // Our mask instructions correctly update the stack pointer.
+    case ARM::SFI_DATA_MASK:
+      return true;
+
+    // These just bump SP by a little (and access the stack),
+    // so that is okay due to guard pages.
+    case ARM::STMIA_UPD:
+    case ARM::STMDA_UPD:
+    case ARM::STMDB_UPD:
+    case ARM::STMIB_UPD:
+
+    case ARM::VSTMDIA_UPD:
+    case ARM::VSTMDDB_UPD:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDMIA_UPD:
+    case ARM::LDMDA_UPD:
+    case ARM::LDMDB_UPD:
+    case ARM::LDMIB_UPD:
+
+    case ARM::VLDMDIA_UPD:
+    case ARM::VLDMDDB_UPD:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD: {
+      bool dest_SP = false;
+      // Dest regs start at operand index 4.
+      for (unsigned i = 4; i < MI.getNumOperands(); ++i) {
+        const MachineOperand &DestReg = MI.getOperand(i);
+        dest_SP = dest_SP || (DestReg.getReg() == ARM::SP);
+      }
+      if (dest_SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Some localmods *should* prevent selecting a reg offset
+    // (see SelectAddrMode2 in ARMISelDAGToDAG.cpp).
+    // Otherwise, the store is already a potential violation.
+    case ARM::STR_PRE_REG:
+    case ARM::STR_PRE_IMM:
+
+    case ARM::STRH_PRE:
+
+    case ARM::STRB_PRE_REG:
+    case ARM::STRB_PRE_IMM:
+      return true;
+
+    // Similar, unless it is a load into SP...
+    case ARM::LDRi12:
+    case ARM::LDR_PRE_REG:
+    case ARM::LDR_PRE_IMM:
+    case ARM::LDRH_PRE:
+    case ARM::LDRB_PRE_REG:
+    case ARM::LDRB_PRE_IMM:
+    case ARM::LDRSH_PRE:
+    case ARM::LDRSB_PRE: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      return true;
+    }
+
+    // Here, if SP is the base / write-back reg, we need to check if
+    // a reg is used as offset (otherwise it is not a small nudge).
+    case ARM::STR_POST_REG:
+    case ARM::STR_POST_IMM:
+    case ARM::STRH_POST:
+    case ARM::STRB_POST_REG:
+    case ARM::STRB_POST_IMM: {
+      const MachineOperand &WBReg = MI.getOperand(0);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+
+    // Similar, but also check that DestReg is not SP.
+    case ARM::LDR_POST_REG:
+    case ARM::LDR_POST_IMM:
+    case ARM::LDRB_POST_REG:
+    case ARM::LDRB_POST_IMM:
+    case ARM::LDRH_POST:
+    case ARM::LDRSH_POST:
+    case ARM::LDRSB_POST: {
+      const MachineOperand &DestReg = MI.getOperand(0);
+      if (DestReg.getReg() == ARM::SP) {
+        break;
+      }
+      const MachineOperand &WBReg = MI.getOperand(1);
+      const MachineOperand &OffReg = MI.getOperand(3);
+      if (WBReg.getReg() == ARM::SP && OffReg.getReg() != 0) {
+        break;
+      }
+      return true;
+    }
+  }
+
+  return (NextInstrMasksSP(MI));
+}
+
+bool NeedSandboxStackChange(const MachineInstr &MI,
+                               const TargetRegisterInfo *TRI) {
+  return (IsStackChange(MI, TRI) && !IsSandboxedStackChange(MI));
+}
+
+} // namespace ARM_SFI
+
+/**********************************************************************/
+
+void ARMNaClRewritePass::getAnalysisUsage(AnalysisUsage &AU) const {
+  // Slight (possibly unnecessary) efficiency tweak:
+  // Promise not to modify the CFG.
+  AU.setPreservesCFG();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/*
+ * A primitive validator to catch problems at compile time.
+ * E.g., it could be used along with bugpoint to reduce a bitcode file.
+ */
+void ARMNaClRewritePass::LightweightVerify(MachineFunction &MF) {
+  DEBUG({
+      for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end();
+           MFI != MFE;
+           ++MFI) {
+        MachineBasicBlock &MBB = *MFI;
+        for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
+             MBBI != MBBE;
+             ++MBBI) {
+          MachineInstr &MI = *MBBI;
+          if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+            dbgs() << "LightWeightVerify for function: "
+                   << MF.getFunction()->getName() << "  (BAD STACK CHANGE)\n";
+            DumpInstructionVerbose(MI);
+            DumpBasicBlockVerbose(MBB);
+          }
+        }
+      }
+    });
+}
+
+void ARMNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  // (1) Ensure there is room in the bundle for a data mask instruction
+  // (nop'ing to the next bundle if needed).
+  // (2) Do a data mask on SP after the instruction that updated SP.
+  MachineInstr &MI = *MBBI;
+
+  // Use same predicate as current instruction.
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(ARM::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr.
+  MachineBasicBlock::iterator MBBINext = (++MBBI);
+
+  BuildMI(MBB, MBBINext, MI.getDebugLoc(),
+          TII->get(ARM::SFI_DATA_MASK))
+      .addReg(ARM::SP, RegState::Define)  // modify SP (as dst)
+      .addReg(ARM::SP, RegState::Kill)    // start with SP (as src)
+      .addImm((int64_t) Pred)             // predicate condition
+      .addReg(PredReg);                   // predicate source register (CPSR)
+}
+
+bool ARMNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (ARM_SFI::NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified |= true;
+    }
+  }
+  return Modified;
+}
+
+bool ARMNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    // Use same predicate as current instruction.
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+
+    if (IsReturn(MI)) {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_RETURN))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(PredReg);        // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectJump(MI)) {
+      unsigned Addr = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_JMP))
+        .addReg(Addr, RegState::Define)  // Destination definition (as dst)
+        .addReg(Addr, RegState::Kill)    // Destination read (as src)
+        .addImm((int64_t) Pred)          // predicate condition
+        .addReg(PredReg);                // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsDirectCall(MI)) {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_CALL))
+        .addImm((int64_t) Pred)  // predicate condition
+        .addReg(PredReg);        // predicate source register (CPSR)
+      Modified = true;
+    }
+
+    if (IsIndirectCall(MI)) {
+      unsigned Addr = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(ARM::SFI_GUARD_INDIRECT_CALL))
+        .addReg(Addr, RegState::Define)  // Destination definition (as dst)
+        .addReg(Addr, RegState::Kill)    // Destination read (as src)
+        .addImm((int64_t) Pred)          // predicate condition
+        .addReg(PredReg);                // predicate source register (CPSR)
+        Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+static bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMSIA:
+
+  case ARM::PLDi12:
+  case ARM::PLDWi12:
+  case ARM::PLIi12:
+    *AddrIdx = 0;
+    break;
+  // Instructions with base address register in position 1...
+  case ARM::LDMIA_UPD: // same reg at position 0 and position 1
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+
+  case ARM::LDRSB:
+  case ARM::LDRH:
+  case ARM::LDRSH:
+
+  case ARM::LDRi12:
+  case ARM::LDRrs:
+  case ARM::LDRBi12:
+  case ARM::LDRBrs:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VLDRS:
+  case ARM::VLDRD:
+
+  case ARM::LDREX:
+  case ARM::LDREXB:
+  case ARM::LDREXH:
+  case ARM::LDREXD:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::LDR_PRE_REG:
+  case ARM::LDR_PRE_IMM:
+  case ARM::LDR_POST_REG:
+  case ARM::LDR_POST_IMM:
+
+  case ARM::LDRB_PRE_REG:
+  case ARM::LDRB_PRE_IMM:
+  case ARM::LDRB_POST_REG:
+  case ARM::LDRB_POST_IMM:
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+
+  case ARM::LDRD:
+    *AddrIdx = 2;
+    break;
+
+  //
+  // NEON loads
+  //
+
+  // VLD1
+  case ARM::VLD1d8:
+  case ARM::VLD1d16:
+  case ARM::VLD1d32:
+  case ARM::VLD1d64:
+  case ARM::VLD1q8:
+  case ARM::VLD1q16:
+  case ARM::VLD1q32:
+  case ARM::VLD1q64:
+    *AddrIdx = 1;
+    break;
+
+  case ARM::VLD1d8wb_fixed:
+  case ARM::VLD1d16wb_fixed:
+  case ARM::VLD1d32wb_fixed:
+  case ARM::VLD1d64wb_fixed:
+  case ARM::VLD1q8wb_fixed:
+  case ARM::VLD1q16wb_fixed:
+  case ARM::VLD1q32wb_fixed:
+  case ARM::VLD1q64wb_fixed:
+  case ARM::VLD1d8wb_register:
+  case ARM::VLD1d16wb_register:
+  case ARM::VLD1d32wb_register:
+  case ARM::VLD1d64wb_register:
+  case ARM::VLD1q8wb_register:
+  case ARM::VLD1q16wb_register:
+  case ARM::VLD1q32wb_register:
+  case ARM::VLD1q64wb_register:
+    *AddrIdx = 2;
+    break;
+
+  // VLD1T
+  case ARM::VLD1d8T:
+  case ARM::VLD1d16T:
+  case ARM::VLD1d32T:
+  case ARM::VLD1d64T:
+    *AddrIdx = 1;
+    break;
+
+  case ARM::VLD1d8Twb_fixed:
+  case ARM::VLD1d16Twb_fixed:
+  case ARM::VLD1d32Twb_fixed:
+  case ARM::VLD1d64Twb_fixed:
+  case ARM::VLD1d8Twb_register:
+  case ARM::VLD1d16Twb_register:
+  case ARM::VLD1d32Twb_register:
+  case ARM::VLD1d64Twb_register:
+    *AddrIdx = 2;
+    break;
+
+  // VLD1Q
+  case ARM::VLD1d8Q:
+  case ARM::VLD1d16Q:
+  case ARM::VLD1d32Q:
+  case ARM::VLD1d64Q:
+    *AddrIdx = 1;
+    break;
+
+  case ARM::VLD1d8Qwb_fixed:
+  case ARM::VLD1d16Qwb_fixed:
+  case ARM::VLD1d32Qwb_fixed:
+  case ARM::VLD1d64Qwb_fixed:
+  case ARM::VLD1d8Qwb_register:
+  case ARM::VLD1d16Qwb_register:
+  case ARM::VLD1d32Qwb_register:
+  case ARM::VLD1d64Qwb_register:
+    *AddrIdx = 2;
+    break;
+
+  // VLD1LN
+  case ARM::VLD1LNd8:
+  case ARM::VLD1LNd16:
+  case ARM::VLD1LNd32:
+  case ARM::VLD1LNd8_UPD:
+  case ARM::VLD1LNd16_UPD:
+  case ARM::VLD1LNd32_UPD:
+
+  // VLD1DUP
+  case ARM::VLD1DUPd8:
+  case ARM::VLD1DUPd16:
+  case ARM::VLD1DUPd32:
+  case ARM::VLD1DUPq8:
+  case ARM::VLD1DUPq16:
+  case ARM::VLD1DUPq32:
+  case ARM::VLD1DUPd8wb_fixed:
+  case ARM::VLD1DUPd16wb_fixed:
+  case ARM::VLD1DUPd32wb_fixed:
+  case ARM::VLD1DUPq8wb_fixed:
+  case ARM::VLD1DUPq16wb_fixed:
+  case ARM::VLD1DUPq32wb_fixed:
+  case ARM::VLD1DUPd8wb_register:
+  case ARM::VLD1DUPd16wb_register:
+  case ARM::VLD1DUPd32wb_register:
+  case ARM::VLD1DUPq8wb_register:
+  case ARM::VLD1DUPq16wb_register:
+  case ARM::VLD1DUPq32wb_register:
+
+  // VLD2
+  case ARM::VLD2d8:
+  case ARM::VLD2d16:
+  case ARM::VLD2d32:
+  case ARM::VLD2b8:
+  case ARM::VLD2b16:
+  case ARM::VLD2b32:
+  case ARM::VLD2q8:
+  case ARM::VLD2q16:
+  case ARM::VLD2q32:
+    *AddrIdx = 1;
+    break;
+
+  case ARM::VLD2d8wb_fixed:
+  case ARM::VLD2d16wb_fixed:
+  case ARM::VLD2d32wb_fixed:
+  case ARM::VLD2b8wb_fixed:
+  case ARM::VLD2b16wb_fixed:
+  case ARM::VLD2b32wb_fixed:
+  case ARM::VLD2q8wb_fixed:
+  case ARM::VLD2q16wb_fixed:
+  case ARM::VLD2q32wb_fixed:
+  case ARM::VLD2d8wb_register:
+  case ARM::VLD2d16wb_register:
+  case ARM::VLD2d32wb_register:
+  case ARM::VLD2b8wb_register:
+  case ARM::VLD2b16wb_register:
+  case ARM::VLD2b32wb_register:
+  case ARM::VLD2q8wb_register:
+  case ARM::VLD2q16wb_register:
+  case ARM::VLD2q32wb_register:
+    *AddrIdx = 2;
+    break;
+
+  // VLD2LN
+  case ARM::VLD2LNd8:
+  case ARM::VLD2LNd16:
+  case ARM::VLD2LNd32:
+  case ARM::VLD2LNq16:
+  case ARM::VLD2LNq32:
+    *AddrIdx = 2;
+    break;
+
+  case ARM::VLD2LNd8_UPD:
+  case ARM::VLD2LNd16_UPD:
+  case ARM::VLD2LNd32_UPD:
+  case ARM::VLD2LNq16_UPD:
+  case ARM::VLD2LNq32_UPD:
+    *AddrIdx = 3;
+    break;
+
+  // VLD2DUP
+  case ARM::VLD2DUPd8:
+  case ARM::VLD2DUPd16:
+  case ARM::VLD2DUPd32:
+  case ARM::VLD2DUPd8x2:
+  case ARM::VLD2DUPd16x2:
+  case ARM::VLD2DUPd32x2:
+    *AddrIdx = 1;
+    break;
+
+  case ARM::VLD2DUPd8wb_fixed:
+  case ARM::VLD2DUPd16wb_fixed:
+  case ARM::VLD2DUPd32wb_fixed:
+  case ARM::VLD2DUPd8wb_register:
+  case ARM::VLD2DUPd16wb_register:
+  case ARM::VLD2DUPd32wb_register:
+  case ARM::VLD2DUPd8x2wb_fixed:
+  case ARM::VLD2DUPd16x2wb_fixed:
+  case ARM::VLD2DUPd32x2wb_fixed:
+  case ARM::VLD2DUPd8x2wb_register:
+  case ARM::VLD2DUPd16x2wb_register:
+  case ARM::VLD2DUPd32x2wb_register:
+    *AddrIdx = 2;
+    break;
+
+  // VLD3
+  case ARM::VLD3d8:
+  case ARM::VLD3d16:
+  case ARM::VLD3d32:
+  case ARM::VLD3q8:
+  case ARM::VLD3q16:
+  case ARM::VLD3q32:
+  case ARM::VLD3d8_UPD:
+  case ARM::VLD3d16_UPD:
+  case ARM::VLD3d32_UPD:
+  case ARM::VLD3q8_UPD:
+  case ARM::VLD3q16_UPD:
+  case ARM::VLD3q32_UPD:
+
+  // VLD3LN
+  case ARM::VLD3LNd8:
+  case ARM::VLD3LNd16:
+  case ARM::VLD3LNd32:
+  case ARM::VLD3LNq16:
+  case ARM::VLD3LNq32:
+    *AddrIdx = 3;
+    break;
+
+  case ARM::VLD3LNd8_UPD:
+  case ARM::VLD3LNd16_UPD:
+  case ARM::VLD3LNd32_UPD:
+  case ARM::VLD3LNq16_UPD:
+  case ARM::VLD3LNq32_UPD:
+    *AddrIdx = 4;
+    break;
+
+  // VLD3DUP
+  case ARM::VLD3DUPd8:
+  case ARM::VLD3DUPd16:
+  case ARM::VLD3DUPd32:
+  case ARM::VLD3DUPq8:
+  case ARM::VLD3DUPq16:
+  case ARM::VLD3DUPq32:
+    *AddrIdx = 3;
+    break;
+
+  case ARM::VLD3DUPd8_UPD:
+  case ARM::VLD3DUPd16_UPD:
+  case ARM::VLD3DUPd32_UPD:
+  case ARM::VLD3DUPq8_UPD:
+  case ARM::VLD3DUPq16_UPD:
+  case ARM::VLD3DUPq32_UPD:
+    *AddrIdx = 4;
+    break;
+
+  // VLD4
+  case ARM::VLD4d8:
+  case ARM::VLD4d16:
+  case ARM::VLD4d32:
+  case ARM::VLD4q8:
+  case ARM::VLD4q16:
+  case ARM::VLD4q32:
+    *AddrIdx = 4;
+    break;
+
+  case ARM::VLD4d8_UPD:
+  case ARM::VLD4d16_UPD:
+  case ARM::VLD4d32_UPD:
+  case ARM::VLD4q8_UPD:
+  case ARM::VLD4q16_UPD:
+  case ARM::VLD4q32_UPD:
+    *AddrIdx = 5;
+    break;
+
+  // VLD4LN
+  case ARM::VLD4LNd8:
+  case ARM::VLD4LNd16:
+  case ARM::VLD4LNd32:
+  case ARM::VLD4LNq16:
+  case ARM::VLD4LNq32:
+    *AddrIdx = 4;
+    break;
+
+  case ARM::VLD4LNd8_UPD:
+  case ARM::VLD4LNd16_UPD:
+  case ARM::VLD4LNd32_UPD:
+  case ARM::VLD4LNq16_UPD:
+  case ARM::VLD4LNq32_UPD:
+    *AddrIdx = 5;
+    break;
+
+  case ARM::VLD4DUPd8:
+  case ARM::VLD4DUPd16:
+  case ARM::VLD4DUPd32:
+  case ARM::VLD4DUPq16:
+  case ARM::VLD4DUPq32:
+    *AddrIdx = 4;
+    break;
+
+  case ARM::VLD4DUPd8_UPD:
+  case ARM::VLD4DUPd16_UPD:
+  case ARM::VLD4DUPd32_UPD:
+  case ARM::VLD4DUPq16_UPD:
+  case ARM::VLD4DUPq32_UPD:
+    *AddrIdx = 5;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+/*
+ * Sandboxes a memory reference instruction by inserting an appropriate mask
+ * or check operation before it.
+ */
+void ARMNaClRewritePass::SandboxMemory(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI,
+                                       MachineInstr &MI,
+                                       int AddrIdx,
+                                       bool IsLoad) {
+  unsigned Addr = MI.getOperand(AddrIdx).getReg();
+
+  if (!FlagNaClUseM23ArmAbi && Addr == ARM::R9) {
+    // R9-relative loads are no longer sandboxed.
+    assert(IsLoad && "There should be no r9-relative stores");
+  } else {
+    unsigned Opcode;
+    if (IsLoad && (MI.getOperand(0).getReg() == ARM::SP)) {
+      Opcode = ARM::SFI_GUARD_SP_LOAD;
+    } else {
+      Opcode = ARM::SFI_GUARD_LOADSTORE;
+    }
+    // Use same predicate as current instruction.
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+    // Use the older BIC sandbox, which is universal, but incurs a stall.
+    BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
+      .addReg(Addr, RegState::Define)  // Address definition (as dst).
+      .addReg(Addr, RegState::Kill)    // Address read (as src).
+      .addImm((int64_t) Pred)          // predicate condition
+      .addReg(PredReg);                // predicate source register (CPSR)
+
+    /*
+     * This pseudo-instruction is intended to generate something resembling the
+     * following, but with alignment enforced.
+     * TODO(cbiffle): move alignment into this function, use the code below.
+     *
+     *  // bic<cc> Addr, Addr, #0xC0000000
+     *  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+     *          TII->get(ARM::BICri))
+     *    .addReg(Addr)            // rD
+     *    .addReg(Addr)            // rN
+     *    .addImm(0xC0000000)      // imm
+     *    .addImm((int64_t) Pred)  // predicate condition
+     *    .addReg(PredReg)         // predicate source register (CPSR)
+     *    .addReg(0);              // flag output register (0 == no flags)
+     */
+  }
+}
+
+static bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 0...
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMSIA:
+    *AddrIdx = 0;
+    break;
+
+  // Instructions with base address register in position 1...
+  case ARM::STMIA_UPD: // same reg at position 0 and position 1
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+
+  case ARM::STRH:
+  case ARM::STRi12:
+  case ARM::STRrs:
+  case ARM::STRBi12:
+  case ARM::STRBrs:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VSTRS:
+  case ARM::VSTRD:
+    *AddrIdx = 1;
+    break;
+
+  //
+  // NEON stores
+  //
+
+  // VST1
+  case ARM::VST1d8:
+  case ARM::VST1d16:
+  case ARM::VST1d32:
+  case ARM::VST1d64:
+  case ARM::VST1q8:
+  case ARM::VST1q16:
+  case ARM::VST1q32:
+  case ARM::VST1q64:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST1d8wb_fixed:
+  case ARM::VST1d16wb_fixed:
+  case ARM::VST1d32wb_fixed:
+  case ARM::VST1d64wb_fixed:
+  case ARM::VST1q8wb_fixed:
+  case ARM::VST1q16wb_fixed:
+  case ARM::VST1q32wb_fixed:
+  case ARM::VST1q64wb_fixed:
+  case ARM::VST1d8wb_register:
+  case ARM::VST1d16wb_register:
+  case ARM::VST1d32wb_register:
+  case ARM::VST1d64wb_register:
+  case ARM::VST1q8wb_register:
+  case ARM::VST1q16wb_register:
+  case ARM::VST1q32wb_register:
+  case ARM::VST1q64wb_register:
+    *AddrIdx = 1;
+    break;
+
+  // VST1LN
+  case ARM::VST1LNd8:
+  case ARM::VST1LNd16:
+  case ARM::VST1LNd32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST1LNd8_UPD:
+  case ARM::VST1LNd16_UPD:
+  case ARM::VST1LNd32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // VST2
+  case ARM::VST2d8:
+  case ARM::VST2d16:
+  case ARM::VST2d32:
+  case ARM::VST2q8:
+  case ARM::VST2q16:
+  case ARM::VST2q32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST2d8wb_fixed:
+  case ARM::VST2d16wb_fixed:
+  case ARM::VST2d32wb_fixed:
+  case ARM::VST2q8wb_fixed:
+  case ARM::VST2q16wb_fixed:
+  case ARM::VST2q32wb_fixed:
+  case ARM::VST2d8wb_register:
+  case ARM::VST2d16wb_register:
+  case ARM::VST2d32wb_register:
+  case ARM::VST2q8wb_register:
+  case ARM::VST2q16wb_register:
+  case ARM::VST2q32wb_register:
+    *AddrIdx = 1;
+    break;
+
+  // VST2LN
+  case ARM::VST2LNd8:
+  case ARM::VST2LNd16:
+  case ARM::VST2LNq16:
+  case ARM::VST2LNd32:
+  case ARM::VST2LNq32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST2LNd8_UPD:
+  case ARM::VST2LNd16_UPD:
+  case ARM::VST2LNq16_UPD:
+  case ARM::VST2LNd32_UPD:
+  case ARM::VST2LNq32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // VST3
+  case ARM::VST3d8:
+  case ARM::VST3d16:
+  case ARM::VST3d32:
+  case ARM::VST3q8:
+  case ARM::VST3q16:
+  case ARM::VST3q32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST3d8_UPD:
+  case ARM::VST3d16_UPD:
+  case ARM::VST3d32_UPD:
+  case ARM::VST3q8_UPD:
+  case ARM::VST3q16_UPD:
+  case ARM::VST3q32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // VST3LN
+  case ARM::VST3LNd8:
+  case ARM::VST3LNd16:
+  case ARM::VST3LNq16:
+  case ARM::VST3LNd32:
+  case ARM::VST3LNq32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST3LNd8_UPD:
+  case ARM::VST3LNd16_UPD:
+  case ARM::VST3LNq16_UPD:
+  case ARM::VST3LNd32_UPD:
+  case ARM::VST3LNq32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // VST4
+  case ARM::VST4d8:
+  case ARM::VST4d16:
+  case ARM::VST4d32:
+  case ARM::VST4q8:
+  case ARM::VST4q16:
+  case ARM::VST4q32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST4d8_UPD:
+  case ARM::VST4d16_UPD:
+  case ARM::VST4d32_UPD:
+  case ARM::VST4q8_UPD:
+  case ARM::VST4q16_UPD:
+  case ARM::VST4q32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // VST4LN
+  case ARM::VST4LNd8:
+  case ARM::VST4LNd16:
+  case ARM::VST4LNq16:
+  case ARM::VST4LNd32:
+  case ARM::VST4LNq32:
+    *AddrIdx = 0;
+    break;
+
+  case ARM::VST4LNd8_UPD:
+  case ARM::VST4LNd16_UPD:
+  case ARM::VST4LNq16_UPD:
+  case ARM::VST4LNd32_UPD:
+  case ARM::VST4LNq32_UPD:
+    *AddrIdx = 1;
+    break;
+
+  // Instructions with base address register in position 2...
+  case ARM::STR_PRE_REG:
+  case ARM::STR_PRE_IMM:
+  case ARM::STR_POST_REG:
+  case ARM::STR_POST_IMM:
+
+  case ARM::STRB_PRE_REG:
+  case ARM::STRB_PRE_IMM:
+  case ARM::STRB_POST_REG:
+  case ARM::STRB_POST_IMM:
+
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+
+
+  case ARM::STRD:
+  case ARM::STREX:
+  case ARM::STREXB:
+  case ARM::STREXH:
+  case ARM::STREXD:
+    *AddrIdx = 2;
+    break;
+  }
+
+  if (MI.getOperand(*AddrIdx).getReg() == ARM::SP) {
+    // The contents of SP do not require masking.
+    return false;
+  }
+
+  return true;
+}
+
+bool ARMNaClRewritePass::SandboxMemoryReferencesInBlock(
+    MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (FlagSfiLoad && IsDangerousLoad(MI, &AddrIdx)) {
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, true);
+      Modified = true;
+    }
+    if (FlagSfiStore && IsDangerousStore(MI, &AddrIdx)) {
+      SandboxMemory(MBB, MBBI, MI, AddrIdx, false);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+/**********************************************************************/
+
+bool ARMNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (MBB.hasAddressTaken()) {
+      //FIXME: use symbolic constant or get this value from some configuration
+      MBB.setAlignment(4);
+      Modified = true;
+    }
+
+    if (FlagSfiLoad || FlagSfiStore)
+      Modified |= SandboxMemoryReferencesInBlock(MBB);
+    if (FlagSfiBranch) Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)  Modified |= SandboxStackChangesInBlock(MBB);
+  }
+  DEBUG(LightweightVerify(MF));
+  return Modified;
+}
+
+/// createARMNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createARMNaClRewritePass() {
+  return new ARMNaClRewritePass();
+}
diff --git a/lib/Target/ARM/ARMNaClRewritePass.h b/lib/Target/ARM/ARMNaClRewritePass.h
new file mode 100644
index 0000000000..c8854a54fc
--- /dev/null
+++ b/lib/Target/ARM/ARMNaClRewritePass.h
@@ -0,0 +1,36 @@
+//===-- ARMNaClRewritePass.h - NaCl Sandboxing Pass    ------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARMNACLREWRITEPASS_H
+#define TARGET_ARMNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiZeroMask;
+  extern cl::opt<bool> FlagSfiData;
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+namespace ARM_SFI {
+
+bool IsStackChange(const llvm::MachineInstr &MI,
+                   const llvm::TargetRegisterInfo *TRI);
+bool IsSandboxedStackChange(const llvm::MachineInstr &MI);
+bool NeedSandboxStackChange(const llvm::MachineInstr &MI,
+                               const llvm::TargetRegisterInfo *TRI);
+
+} // namespace ARM_SFI
+
+#endif
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 41a7e0c2c8..0056562719 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -146,7 +146,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                         unsigned Align, bool isVolatile,
                         MachinePointerInfo DstPtrInfo) const {
   // Use default for non AAPCS (or Darwin) subtargets
-  if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin())
+  if (Subtarget->isTargetNaCl() || !Subtarget->isAAPCS_ABI() || Subtarget->isTargetDarwin()) // @LOCALMOD
     return SDValue();
 
   const ARMTargetLowering &TLI =
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 8653c462f0..48fd903f80 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -34,6 +34,15 @@ ReserveR9("arm-reserve-r9", cl::Hidden,
 static cl::opt<bool>
 DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
 
+// @LOCALMOD-START
+// TODO: * JITing has not been tested at all
+//       * Thumb mode operation is also not clear: it seems jump tables
+//         for thumb are broken independent of this option
+static cl::opt<bool>
+NoInlineJumpTables("no-inline-jumptables",
+                  cl::desc("Do not place jump tables inline in the code"));
+// @LOCALMOD-END
+
 static cl::opt<bool>
 UseFusedMulOps("arm-use-mulops",
                cl::init(true), cl::Hidden);
@@ -67,6 +76,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasVFPv4 = false;
   HasNEON = false;
   UseNEONForSinglePrecisionFP = false;
+  UseInlineJumpTables = !NoInlineJumpTables;
   UseMulOps = UseFusedMulOps;
   SlowFPVMLx = false;
   HasVMLxForwarding = false;
@@ -159,6 +169,18 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
     SupportsTailCall = !getTargetTriple().isOSVersionLT(5, 0);
   }
 
+  // @LOCALMOD-BEGIN
+  // Advanced SIMD and Q registers are part of the NaCl ARM ABI.  The ARM
+  // EABI specifies only an 8 byte alignment, which can result in poor
+  // performance for these 16 byte data types if they straddle cache lines, etc.
+  // Therefore, NaCl aligns stack frames 0mod16.
+  if (isTargetNaCl())
+    stackAlignment = 16;
+  // NaCl uses MovT to avoid generating constant islands.
+  if (isTargetNaCl() && !useConstPool())
+    UseMovt = true;
+  // @LOCALMOD-END
+
   if (!isThumb() || hasThumb2())
     PostRAScheduler = true;
 
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 038eb76ae1..bc32aece9a 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -23,6 +23,15 @@
 #define GET_SUBTARGETINFO_HEADER
 #include "ARMGenSubtargetInfo.inc"
 
+// @LOCALMOD-BEGIN
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+  extern cl::opt<bool> FlagSfiDisableCP;
+  extern cl::opt<bool> FlagNaClUseM23ArmAbi;
+}
+// @LOCALMOD-END
+
+
 namespace llvm {
 class GlobalValue;
 class StringRef;
@@ -92,6 +101,11 @@ protected:
   /// IsR9Reserved - True if R9 is a not available as general purpose register.
   bool IsR9Reserved;
 
+  // @LOCALMOD-START
+  /// UseInlineJumpTables - True if jump tables should be in-line in the code.
+  bool UseInlineJumpTables;
+  // @LOCALMOD-END
+
   /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
   /// imms (including global addresses).
   bool UseMovt;
@@ -270,9 +284,8 @@ public:
 
   bool isTargetIOS() const { return TargetTriple.getOS() == Triple::IOS; }
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
-  bool isTargetNaCl() const {
-    return TargetTriple.getOS() == Triple::NaCl;
-  }
+  bool isTargetNaCl() const { return TargetTriple.getOS() == Triple::NaCl; }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
   bool isTargetELF() const { return !isTargetDarwin(); }
 
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
@@ -290,6 +303,9 @@ public:
   bool useMovt() const { return UseMovt && hasV6T2Ops(); }
   bool supportsTailCall() const { return SupportsTailCall; }
 
+  // @LOCALMOD
+  bool useConstPool() const { return !FlagSfiDisableCP; }
+
   bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
 
   const std::string & getCPUString() const { return CPUString; }
@@ -313,6 +329,8 @@ public:
   /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
   /// symbol.
   bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+
+  bool useInlineJumpTables() const {return UseInlineJumpTables;} // @LOCALMOD
 };
 } // End llvm namespace
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 42c7d2c437..ecd69726e6 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -20,6 +20,9 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
+// @LOCALMOD-START
+#include "llvm/Transforms/NaCl.h"
+// @LOCALMOD-END
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
@@ -33,6 +36,13 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden,
                    cl::desc("Inhibit optimization of S->D register accesses on A15"),
                    cl::init(false));
 
+// @LOCALMOD-START
+namespace llvm {
+cl::opt<bool> FlagSfiDisableCP("sfi-disable-cp",
+                               cl::desc("disable arm constant island pools"));
+}
+// @LOCALMOD-END
+
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -139,6 +149,9 @@ public:
   virtual bool addPreRegAlloc();
   virtual bool addPreSched2();
   virtual bool addPreEmitPass();
+// @LOCALMOD-START
+  virtual void addIRPasses();
+// @LOCALMOD-END
 };
 } // namespace
 
@@ -147,8 +160,18 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 bool ARMPassConfig::addPreISel() {
-  if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge)
+  // @LOCALMOD-START
+  // We disable the GlobalMerge pass for PNaCl because it causes the
+  // PNaCl ABI checker to reject the program when the PNaCl translator
+  // is run in streaming mode.  This is because GlobalMerge replaces
+  // functions' GlobalVariable references with ConstantExprs which the
+  // ABI verifier rejects.
+  // TODO(mseaborn): Make the ABI checks coexist with GlobalMerge to
+  // get back the performance benefits of GlobalMerge.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableGlobalMerge &&
+      !getARMSubtarget().isTargetNaCl())
     addPass(createGlobalMergePass(TM->getTargetLowering()));
+  // @LOCALMOD-END
 
   return false;
 }
@@ -212,11 +235,36 @@ bool ARMPassConfig::addPreEmitPass() {
     addPass(&UnpackMachineBundlesID);
   }
 
+  // @LOCALMOD-START
+  // Note with FlagSfiDisableCP we effectively disable the
+  // ARMConstantIslandPass and rely on movt/movw to eliminate the need
+  // for constant islands
+  if (FlagSfiDisableCP) {
+    assert(getARMSubtarget().useMovt());
+  }
+  // @LOCALMOD-END
+
   addPass(createARMConstantIslandPass());
 
+  // @LOCALMOD-START
+  // This pass does all the heavy sfi lifting.
+  if (getARMSubtarget().isTargetNaCl()) {
+    addPass(createARMNaClRewritePass());
+  }
+  // @LOCALMOD-END
+ 
   return true;
 }
 
+// @LOCALMOD-START
+void ARMPassConfig::addIRPasses() {
+  if (getARMSubtarget().isTargetNaCl()) {
+    addPass(createInsertDivideCheckPass());
+  }
+  TargetPassConfig::addIRPasses();
+}
+// @LOCALMOD-END
+
 bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
                                           JITCodeEmitter &JCE) {
   // Machine code emitter pass for ARM.
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index d4caf5ca6e..51f79f24f8 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -28,6 +28,13 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetMachine.h"
 
+// @LOCALMOD-START
+#include "llvm/Support/CommandLine.h"
+namespace llvm {
+   extern cl::opt<bool> FlagSfiDisableCP;
+}
+// @LOCALMOD-END
+
 namespace llvm {
 
 class ARMBaseTargetMachine : public LLVMTargetMachine {
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index dfdf6ab356..c954550bb8 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -10,6 +10,8 @@
 #include "ARMTargetObjectFile.h"
 #include "ARMSubtarget.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h" // @LOCALMOD
+#include "llvm/CodeGen/MachineModuleInfo.h" // @LOCALMOD
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
@@ -30,7 +32,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(isAAPCS_ABI);
 
-  if (isAAPCS_ABI) {
+  // @LOCALMOD-BEGIN
+  if (isAAPCS_ABI && !TM.getSubtarget<ARMSubtarget>().isTargetNaCl()) {
+  // @LOCALMOD-END
     LSDASection = NULL;
   }
 
@@ -46,7 +50,13 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang,
                         MachineModuleInfo *MMI, unsigned Encoding,
                         MCStreamer &Streamer) const {
   assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
-
+  // @LOCALMOD-BEGIN
+  // FIXME: There has got to be a better way to get this info.
+  Triple T(MMI->getModule()->getTargetTriple());
+  if (T.isOSNaCl())
+    return TargetLoweringObjectFileELF::getTTypeGlobalReference(GV, Mang,
+                                        MMI, Encoding, Streamer);
+  // @LOCALMOD-END
   return MCSymbolRefExpr::Create(Mang->getSymbol(GV),
                                  MCSymbolRefExpr::VK_ARM_TARGET2,
                                  getContext());
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index b832508a08..8bf81acc93 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -33,6 +33,8 @@ add_llvm_target(ARMCodeGen
   ARMLoadStoreOptimizer.cpp
   ARMMCInstLower.cpp
   ARMMachineFunctionInfo.cpp
+  ARMNaClHeaders.cpp
+  ARMNaClRewritePass.cpp
   ARMRegisterInfo.cpp
   ARMSelectionDAGInfo.cpp
   ARMSubtarget.cpp
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 3bcd083a35..ff11f37c18 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -223,6 +223,71 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
+  // @LOCALMOD-BEGIN
+  // TODO(pdox): Kill this code once we switch to MC object emission
+  const char *SFIInst = NULL;
+  unsigned SFIEmitDest = ~0;
+  unsigned SFIEmitPred = ~0;
+  switch (Opcode) {
+  case ARM::SFI_NOP_IF_AT_BUNDLE_END :
+    SFIInst = "sfi_nop_if_at_bundle_end";
+    SFIEmitDest = ~0;
+    SFIEmitPred = ~0;
+    break;
+  case ARM::SFI_GUARD_LOADSTORE    :
+    SFIInst = "sfi_load_store_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_INDIRECT_CALL:
+    SFIInst = "sfi_indirect_call_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_INDIRECT_JMP :
+    SFIInst = "sfi_indirect_jump_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_DATA_MASK          :
+    SFIInst = "sfi_data_mask";
+    SFIEmitDest = 0;
+    SFIEmitPred = 2;
+    break;
+  case ARM::SFI_GUARD_LOADSTORE_TST:
+    SFIInst = "sfi_cload_store_preamble";
+    SFIEmitDest = 0;
+    SFIEmitPred = ~0;
+    break;
+  case ARM::SFI_GUARD_CALL     :
+    SFIInst = "sfi_call_preamble";
+    SFIEmitDest = ~0;
+    SFIEmitPred = 0;
+    break;
+  case ARM::SFI_GUARD_RETURN   :
+    SFIInst = "sfi_return_preamble lr,";
+    SFIEmitDest = ~0;
+    SFIEmitPred = 0;
+    break;
+  }
+  if (SFIInst) {
+    O << '\t' << SFIInst;
+    if (SFIEmitDest != (unsigned)~0) {
+      O << ' ';
+      printOperand(MI, SFIEmitDest, O);
+    }
+    if (SFIEmitDest != (unsigned)~0 && SFIEmitPred != (unsigned)~0) {
+      O << ',';
+    }
+    if (SFIEmitPred != (unsigned)~0) {
+      O << ' ';
+      printPredicateOperand(MI, SFIEmitPred, O);
+    }
+    O << '\n';
+    return;
+  }
+  // @LOCALMOD-END
+
   if (Opcode == ARM::tLDMIA) {
     bool Writeback = true;
     unsigned BaseReg = MI->getOperand(0).getReg();
diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt
index fd4b3a33de..6ba3966eaa 100644
--- a/lib/Target/ARM/LLVMBuild.txt
+++ b/lib/Target/ARM/LLVMBuild.txt
@@ -31,5 +31,6 @@ has_jit = 1
 type = Library
 name = ARMCodeGen
 parent = ARM
-required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
+; @LOCALMOD
+required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target NaClTransforms
 add_to_library_groups = ARM
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index e66e985678..2d8abc5da2 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -13,6 +13,7 @@
 #include "MCTargetDesc/ARMFixupKinds.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "MCTargetDesc/ARMMCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -637,6 +638,18 @@ public:
   }
 };
 
+// @LOCALMOD-BEGIN
+class NaClARMAsmBackend : public ELFARMAsmBackend {
+public:
+  NaClARMAsmBackend(const Target &T, const StringRef TT, uint8_t _OSABI)
+    : ELFARMAsmBackend(T, TT, _OSABI) { }
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return CustomExpandInstNaClARM(Inst, Out);
+  }
+};
+// @LOCALMOD-END
+
 // FIXME: This should be in a separate file.
 class DarwinARMAsmBackend : public ARMAsmBackend {
 public:
@@ -684,5 +697,9 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T, StringRef TT, StringRef
     assert(0 && "Windows not supported on ARM");
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+  // @LOCALMOD-END
+  if (TheTriple.isOSNaCl())
+    return new NaClARMAsmBackend(T, TT, OSABI);
+  // @LOCALMOD-END
   return new ELFARMAsmBackend(T, TT, OSABI);
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6c3d247668..b7dc450600 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -475,5 +475,3 @@ namespace llvm {
   }
 
 }
-
-
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index f0b289c6f3..059ee99f1c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -28,6 +28,12 @@ namespace llvm {
     virtual void anchor();
   public:
     explicit ARMELFMCAsmInfo();
+    // @LOCALMOD-BEGIN
+    // Exceptions handling
+    void setExceptionsType(ExceptionHandling::ExceptionsType ExType) {
+       ExceptionsType = ExType;
+    }
+    // @LOCALMOD-END
   };
 
 } // namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
new file mode 100644
index 0000000000..cd25c865d9
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.cpp
@@ -0,0 +1,327 @@
+//=== ARMMCNaCl.cpp -  Expansion of NaCl pseudo-instructions     --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-mc-nacl"
+
+#include "MCTargetDesc/ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCExpr.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> FlagSfiZeroMask("sfi-zero-mask");
+}
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitBICMask(MCStreamer &Out,
+                        unsigned Addr, int64_t  Pred, unsigned Mask) {
+  // bic\Pred \Addr, \Addr, #Mask
+  MCInst BICInst;
+  BICInst.setOpcode(ARM::BICri);
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rD
+  BICInst.addOperand(MCOperand::CreateReg(Addr)); // rS
+  if (FlagSfiZeroMask) {
+    BICInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    BICInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  BICInst.addOperand(MCOperand::CreateImm(Pred));  // predicate
+  BICInst.addOperand(MCOperand::CreateReg(ARM::CPSR)); // CPSR
+  BICInst.addOperand(MCOperand::CreateReg(0)); // flag out
+  Out.EmitInstruction(BICInst);
+}
+
+static void EmitTST(MCStreamer &Out, unsigned Reg) {
+  // tst \reg, #\MASK typically 0xc0000000
+  const unsigned Mask = 0xC0000000;
+  MCInst TSTInst;
+  TSTInst.setOpcode(ARM::TSTri);
+  TSTInst.addOperand(MCOperand::CreateReg(Reg));  // rS
+  if (FlagSfiZeroMask) {
+    TSTInst.addOperand(MCOperand::CreateImm(0)); // imm
+  } else {
+    TSTInst.addOperand(MCOperand::CreateImm(Mask)); // imm
+  }
+  TSTInst.addOperand(MCOperand::CreateImm((int64_t)ARMCC::AL)); // Always
+  TSTInst.addOperand(MCOperand::CreateImm(0)); // flag out
+  Out.EmitInstruction(TSTInst);
+}
+
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+//
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 &&
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  int64_t  Pred = Saved[2].getOperand(2).getImm();
+  assert((ARM::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock(false);
+  Out.EmitInstruction(Saved[1]);
+  EmitBICMask(Out, Addr, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble cond=
+  //   sfi_nops_to_force_slot3
+  assert(I == 2 && (ARM::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleLock(true);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link cond=
+  //   sfi_nops_to_force_slot2
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+  Out.EmitBundleLock(true);
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link cond=
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_code_mask \link \cond
+  assert(I == 2 && (ARM::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock(false);
+  EmitBICMask(Out, Reg, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg cond=
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \cond
+  assert(I == 2 && (ARM::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  int64_t Pred = Saved[0].getOperand(0).getImm();
+
+  Out.EmitBundleLock(false);
+  EmitBICMask(Out, ARM::LR, Pred, 0xC000000F);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_store_preamble reg cond ---->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg, \cond
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  int64_t Pred = Saved[0].getOperand(2).getImm();
+
+  Out.EmitBundleLock(false);
+  EmitBICMask(Out, Reg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStoreTst(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_cstore_preamble reg -->
+  //   sfi_nop_if_at_bundle_end
+  //   sfi_data_tst \reg
+  assert(I == 2 && (ARM::SFI_GUARD_LOADSTORE_TST == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+
+  Out.EmitBundleLock(false);
+  EmitTST(Out, Reg);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+// This is ONLY used for loads into the stack pointer.
+static void EmitGuardSpLoad(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 4 &&
+         (ARM::SFI_GUARD_SP_LOAD == Saved[0].getOpcode()) &&
+         (ARM::SFI_NOP_IF_AT_BUNDLE_END == Saved[1].getOpcode()) &&
+         (ARM::SFI_DATA_MASK == Saved[3].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned AddrReg = Saved[0].getOperand(0).getReg();
+  unsigned SpReg = Saved[3].getOperand(0).getReg();
+  int64_t  Pred = Saved[3].getOperand(2).getImm();
+  assert((ARM::SP == SpReg) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock(false);
+  EmitBICMask(Out, AddrReg, Pred, 0xC0000000);
+  Out.EmitInstruction(Saved[2]);
+  EmitBICMask(Out, SpReg, Pred, 0xC0000000);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+
+// CustomExpandInstNaClARM -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out) {
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // instructions.
+  //
+  // When a SFI pseudo is detected, it is saved. Then, the saved SFI
+  // pseudo and the very next instructions (their amount depending on the kind
+  // of the SFI pseudo) are used as arguments to the Emit*() functions in
+  // this file.
+  //
+  // Some static data is used to preserve state accross calls (TODO: can this
+  // be lifted into a proper state object?)
+  //
+  // Saved:      the saved instructions (starting with the SFI_ pseudo).
+  // SavedCount: the amount of saved instructions required for the SFI pseudo
+  //             that's being expanded.
+  // I:          the index of the currently saved instruction - used to track
+  //             where in Saved to insert the instruction and how many more
+  //             remain.
+  //
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  // Protect against recursive execution. If RecurseGuard == true, it means
+  // we're already in the process of expanding a custom instruction, and we
+  // don't need to run recursively on anything generated by such an expansion.
+  static bool RecurseGuard = false;
+  if (RecurseGuard)
+    return false;
+
+  DEBUG(dbgs() << "CustomExpandInstNaClARM("; Inst.dump(); dbgs() << ")\n");
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base state: no SFI guard identified yet and no saving started.
+    switch (Inst.getOpcode()) {
+      default:
+        // We don't handle non-SFI guards here
+        return false;
+      case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+        // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+        // a stack guard in conjunction with a SFI_DATA_MASK.
+        SaveCount = 3;
+        break;
+      case ARM::SFI_DATA_MASK:
+        assert(0 &&
+            "SFI_DATA_MASK found without preceding SFI_NOP_IF_AT_BUNDLE_END");
+        return false;
+      case ARM::SFI_GUARD_CALL:
+      case ARM::SFI_GUARD_INDIRECT_CALL:
+      case ARM::SFI_GUARD_INDIRECT_JMP:
+      case ARM::SFI_GUARD_RETURN:
+      case ARM::SFI_GUARD_LOADSTORE:
+      case ARM::SFI_GUARD_LOADSTORE_TST:
+        SaveCount = 2;
+        break;
+      case ARM::SFI_GUARD_SP_LOAD:
+        SaveCount = 4;
+        break;
+    }
+  }
+
+  // We're in "saving instructions" state
+  if (I < SaveCount) {
+    // This instruction has to be saved
+    assert(I < MaxSaved && "Trying to save too many instructions");
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+  }
+
+  // We're in "saved enough instructions, time to emit" state
+  assert(I == SaveCount && SaveCount > 0 && "Bookeeping Error");
+
+  // When calling Emit* functions, do that with RecurseGuard set (the comment
+  // at the beginning of this function explains why)
+  RecurseGuard = true;
+  switch (Saved[0].getOpcode()) {
+    default:
+      break;
+    case ARM::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case ARM::SFI_DATA_MASK:
+      assert(0 && "SFI_DATA_MASK can't start a SFI sequence");
+      break;
+    case ARM::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_LOADSTORE_TST:
+      EmitGuardLoadOrStoreTst(I, Saved, Out);
+      break;
+    case ARM::SFI_GUARD_SP_LOAD:
+      EmitGuardSpLoad(I, Saved, Out);
+      break;
+  }
+  assert(RecurseGuard && "Illegal Depth");
+  RecurseGuard = false;
+
+  // We're done expanding a SFI guard. Reset state vars.
+  SaveCount = 0;
+  I = 0;
+  return true;
+}
+
+} // namespace llvm
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
new file mode 100644
index 0000000000..de7ed50662
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- ARMMCNaCl.h - Prototype for CustomExpandInstNaClARM   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCNACL_H
+#define ARMMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClARM(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index f09fb5a94f..3d6e99f664 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMMCTargetDesc.h"
 #include "ARMBaseInfo.h"
 #include "ARMELFStreamer.h"
 #include "ARMMCAsmInfo.h"
@@ -165,7 +166,18 @@ static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
   if (TheTriple.isOSDarwin())
     return new ARMMCAsmInfoDarwin();
 
-  return new ARMELFMCAsmInfo();
+  // @LOCALMOD-BEGIN
+  ARMELFMCAsmInfo *MAI = new ARMELFMCAsmInfo();
+  if (TheTriple.isOSNaCl()) {
+    // NativeClient uses Dwarf exception handling
+    MAI->setExceptionsType(ExceptionHandling::DwarfCFI);
+    // Initial state of the frame ARM:SP points to cfa
+    MachineLocation Dst(MachineLocation::VirtualFP);
+    MachineLocation Src(ARM::SP, 0);
+    MAI->addInitialFrameState(0, Dst, Src);
+  }
+  return MAI;
+  // @LOCALMOD-END
 }
 
 static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM,
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index a7ac5ca061..b4a9383f78 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMARMDesc
   ARMMCAsmInfo.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
+  ARMMCNaCl.cpp # LOCALMOD
   ARMMCTargetDesc.cpp
   ARMMachObjectWriter.cpp
   ARMELFObjectWriter.cpp
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index 78a9f70c66..5b22c53453 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -34,6 +34,10 @@ add_llvm_target(MipsCodeGen
   MipsMachineFunction.cpp
   MipsModuleISelDAGToDAG.cpp
   MipsOs16.cpp
+# LOCALMOD-BEGIN
+  MipsNaClHeaders.cpp
+  MipsNaClRewritePass.cpp
+# LOCALMOD-END
   MipsRegisterInfo.cpp
   MipsSEFrameLowering.cpp
   MipsSEInstrInfo.cpp
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
index 4212c94a55..06b32f7df4 100644
--- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -7,6 +7,9 @@ add_llvm_library(LLVMMipsDesc
   MipsELFObjectWriter.cpp
   MipsReginfo.cpp
   MipsELFStreamer.cpp
+# LOCALMOD-BEGIN
+  MipsMCNaCl.cpp
+# LOCALMOD-END
   )
 
 add_dependencies(LLVMMipsDesc MipsCommonTableGen)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 0b13607a57..50f0893fce 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -14,6 +14,7 @@
 
 #include "MipsFixupKinds.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "MCTargetDesc/MipsMCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCDirectives.h"
@@ -250,11 +251,27 @@ public:
   }
 }; // class MipsAsmBackend
 
+// @LOCALMOD-BEGIN
+class NaClMipsAsmBackend : public MipsAsmBackend {
+public:
+  NaClMipsAsmBackend(const Target &T, bool _is64Bit)
+    : MipsAsmBackend(T, Triple::NaCl, /* IsLittle */ true, _is64Bit) {}
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return CustomExpandInstNaClMips(Inst, Out);
+  }
+}; // class NaClMipsAsmBackend
+// @LOCALMOD-END
+
 } // namespace
 
 // MCAsmBackend
 MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T, StringRef TT,
                                              StringRef CPU) {
+  // @LOCALMOD-BEGIN
+  if (Triple(TT).isOSNaCl())
+    return new NaClMipsAsmBackend(T, /*Is64Bit*/false);
+  // @LOCALMOD-END
   return new MipsAsmBackend(T, Triple(TT).getOS(),
                             /*IsLittle*/true, /*Is64Bit*/false);
 }
@@ -267,6 +284,10 @@ MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T, StringRef TT,
 
 MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T, StringRef TT,
                                              StringRef CPU) {
+  // @LOCALMOD-BEGIN
+  if (Triple(TT).isOSNaCl())
+    return new NaClMipsAsmBackend(T, /*Is64Bit*/true);
+  // @LOCALMOD-END
   return new MipsAsmBackend(T, Triple(TT).getOS(),
                             /*IsLittle*/true, /*Is64Bit*/true);
 }
@@ -276,4 +297,3 @@ MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T, StringRef TT,
   return new MipsAsmBackend(T, Triple(TT).getOS(),
                             /*IsLittle*/false, /*Is64Bit*/true);
 }
-
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 6471b51583..8c262c39cd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -60,6 +60,7 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
 
 MipsELFObjectWriter::~MipsELFObjectWriter() {}
 
+
 const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
                                                     const MCValue &Target,
                                                     const MCFragment &F,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
new file mode 100644
index 0000000000..fe0cabc923
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.cpp
@@ -0,0 +1,259 @@
+//=== MipsMCNaCl.cpp -  Expansion of NaCl pseudo-instructions    --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mips-mc-nacl"
+
+#include "MCTargetDesc/MipsBaseInfo.h"
+#include "MCTargetDesc/MipsMCTargetDesc.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+/// Two helper functions for emitting the actual guard instructions
+
+static void EmitMask(MCStreamer &Out,
+                        unsigned Addr, unsigned Mask) {
+  // and \Addr, \Addr, \Mask
+  MCInst MaskInst;
+  MaskInst.setOpcode(Mips::AND);
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Addr));
+  MaskInst.addOperand(MCOperand::CreateReg(Mask));
+  Out.EmitInstruction(MaskInst);
+}
+
+// This is ONLY used for sandboxing stack changes.
+// The reason why SFI_NOP_IF_AT_BUNDLE_END gets handled here is that
+// it must ensure that the two instructions are in the same bundle.
+// It just so happens that the SFI_NOP_IF_AT_BUNDLE_END is always
+// emitted in conjunction with a SFI_DATA_MASK
+//
+static void EmitDataMask(int I, MCInst Saved[], MCStreamer &Out) {
+  assert(I == 3 &&
+         (Mips::SFI_NOP_IF_AT_BUNDLE_END == Saved[0].getOpcode()) &&
+         (Mips::SFI_DATA_MASK == Saved[2].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering");
+
+  unsigned Addr = Saved[2].getOperand(0).getReg();
+  unsigned Mask = Saved[2].getOperand(2).getReg();
+  assert((Mips::SP == Addr) && "Unexpected register at stack guard");
+
+  Out.EmitBundleLock(false);
+  Out.EmitInstruction(Saved[1]);
+  EmitMask(Out, Addr, Mask);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitDirectGuardCall(int I, MCInst Saved[],
+                                MCStreamer &Out) {
+  // sfi_call_preamble --->
+  //   sfi_nops_to_force_slot2
+  assert(I == 3 && (Mips::SFI_GUARD_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_CALL");
+  Out.EmitBundleLock(true);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardCall(int I, MCInst Saved[],
+                                  MCStreamer &Out) {
+  // sfi_indirect_call_preamble link --->
+  //   sfi_nops_to_force_slot1
+  //   sfi_code_mask \link \link \maskreg
+  assert(I == 3 && (Mips::SFI_GUARD_INDIRECT_CALL == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_CALL");
+
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock(true);
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitInstruction(Saved[2]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectGuardJmp(int I, MCInst Saved[], MCStreamer &Out) {
+  //  sfi_indirect_jump_preamble link --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \link \link \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_INDIRECT_JMP == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_INDIRECT_JMP");
+  unsigned Addr = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock(false);
+  EmitMask(Out, Addr, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardReturn(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_return_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_code_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_RETURN == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_RETURN");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock(false);
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitGuardLoadOrStore(int I, MCInst Saved[], MCStreamer &Out) {
+  // sfi_load_store_preamble reg --->
+  //    sfi_nop_if_at_bundle_end
+  //    sfi_data_mask \reg \reg \maskreg
+  assert(I == 2 && (Mips::SFI_GUARD_LOADSTORE == Saved[0].getOpcode()) &&
+         "Unexpected SFI Pseudo while lowering SFI_GUARD_LOADSTORE");
+  unsigned Reg = Saved[0].getOperand(0).getReg();
+  unsigned Mask = Saved[0].getOperand(2).getReg();
+
+  Out.EmitBundleLock(false);
+  EmitMask(Out, Reg, Mask);
+  Out.EmitInstruction(Saved[1]);
+  Out.EmitBundleUnlock();
+}
+
+namespace llvm {
+// CustomExpandInstNaClMips -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+
+
+bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out) {
+  const int MaxSaved = 4;
+  static MCInst Saved[MaxSaved];
+  static int SaveCount  = 0;
+  static int I = 0;
+  // This routine only executes  if RecurseGuard == 0
+  static bool RecurseGuard = false;
+
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+
+  //No recursive calls allowed;
+  if (RecurseGuard) return false;
+
+  unsigned Opc = Inst.getOpcode();
+
+  DEBUG(dbgs() << "CustomExpandInstNaClMips("; Inst.dump(); dbgs() << ")\n");
+
+  // Note: SFI_NOP_IF_AT_BUNDLE_END is only emitted directly as part of
+  // a stack guard in conjunction with a SFI_DATA_MASK
+
+  // Logic:
+  // This is somewhat convoluted, but in the current model, the SFI
+  // guard pseudo instructions occur PRIOR to the actual instruction.
+  // So, the bundling/alignment operation has to refer to the FOLLOWING
+  // one or two instructions.
+  //
+  // When a SFI_* pseudo is detected, it is saved. Then, the saved SFI_*
+  // pseudo and the very next one or two instructions are used as arguments to
+  // the Emit*() functions in this file.  This is the reason why we have a
+  // doublely nested switch here.  First, to save the SFI_* pseudo, then to
+  // emit it and the next instruction
+
+  // By default, we only need to save two or three instructions
+
+  if ((I == 0) && (SaveCount == 0)) {
+    // Base State, no saved instructions.
+    // If the current instruction is a SFI instruction, set the SaveCount
+    // and fall through.
+    switch (Opc) {
+    default:
+      SaveCount = 0; // Nothing to do.
+      return false;  // Handle this Inst elsewhere.
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+    case Mips::SFI_GUARD_CALL:
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      SaveCount = 3;
+      break;
+    case Mips::SFI_DATA_MASK:
+      SaveCount = 0; // Do nothing.
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+    case Mips::SFI_GUARD_RETURN:
+    case Mips::SFI_GUARD_LOADSTORE:
+      SaveCount = 2;
+      break;
+    }
+  }
+
+  if (I < SaveCount) {
+    // Othewise, save the current Inst and return
+    Saved[I++] = Inst;
+    if (I < SaveCount)
+      return true;
+    // Else fall through to next stat
+  }
+
+  if (SaveCount > 0) {
+    assert(I == SaveCount && "Bookeeping Error");
+    SaveCount = 0; // Reset for next iteration
+    // The following calls may call Out.EmitInstruction()
+    // which must not again call CustomExpandInst ...
+    // So set RecurseGuard = 1;
+    RecurseGuard = true;
+
+    switch (Saved[0].getOpcode()) {
+    default:  /* No action required */      break;
+    case Mips::SFI_NOP_IF_AT_BUNDLE_END:
+      EmitDataMask(I, Saved, Out);
+      break;
+    case Mips::SFI_DATA_MASK:
+      assert(0 && "Unexpected NOP_IF_AT_BUNDLE_END as a Saved Inst");
+      break;
+    case Mips::SFI_GUARD_CALL:
+      EmitDirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_CALL:
+      EmitIndirectGuardCall(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_INDIRECT_JMP:
+      EmitIndirectGuardJmp(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_RETURN:
+      EmitGuardReturn(I, Saved, Out);
+      break;
+    case Mips::SFI_GUARD_LOADSTORE:
+      EmitGuardLoadOrStore(I, Saved, Out);
+      break;
+    }
+    I = 0; // Reset I for next.
+    assert(RecurseGuard && "Illegal Depth");
+    RecurseGuard = false;
+    return true;
+  }
+  return false;
+}
+
+} // namespace llvm
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
new file mode 100644
index 0000000000..c90502ec33
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -0,0 +1,19 @@
+//===-- MipsMCNaCl.h - Prototype for CustomExpandInstNaClMips ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCNACL_H
+#define MIPSMCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClMips(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 8c65bb4020..248354de25 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -18,6 +18,16 @@
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
+/* @LOCALMOD-START */
+namespace llvm {
+
+namespace Mips {
+  extern unsigned LoadStoreStackMaskReg;
+  extern unsigned IndirectBranchMaskReg;
+}
+} // End llvm namespace
+/* @LOCALMOD-END */
+
 namespace llvm {
   class MipsTargetMachine;
   class FunctionPass;
@@ -29,6 +39,10 @@ namespace llvm {
                                              JITCodeEmitter &JCE);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
 
+  // @LOCALMOD-START
+  FunctionPass *createMipsNaClRewritePass();
+  // @LOCALMOD-END
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 6e4feda4f5..07181e5f5d 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -34,6 +34,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCNaCl.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ELF.h"
@@ -79,15 +80,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (emitPseudoExpansionLowering(OutStreamer, &*I))
       continue;
 
-    // The inMips16Mode() test is not permanent.
-    // Some instructions are marked as pseudo right now which
-    // would make the test fail for the wrong reason but
-    // that will be fixed soon. We need this here because we are
-    // removing another test for this situation downstream in the
-    // callchain.
-    //
-    if (I->isPseudo() && !Subtarget->inMips16Mode())
-      llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+    // @LOCALMOD: the I->isPseudo() assertion here has been removed because
+    // we may have SFI pseudos in I.
 
     MCInst TmpInst0;
     MCInstLowering.Lower(I, TmpInst0);
@@ -227,6 +221,16 @@ const char *MipsAsmPrinter::getCurrentABIString() const {
 }
 
 void MipsAsmPrinter::EmitFunctionEntryLabel() {
+  // @LOCALMOD-START
+  // make sure function entry is aligned. We use XmagicX as our basis
+  // for alignment decisions (c.f. assembler sfi macros).
+  if (Subtarget->isTargetNaCl()) {
+    EmitAlignment(std::max(MF->getAlignment(), 4u));
+    if (OutStreamer.hasRawTextSupport()) {
+      OutStreamer.EmitRawText(StringRef("\t.set XmagicX, .\n"));
+    }
+  }
+  // @LOCALMOD-END
   if (OutStreamer.hasRawTextSupport()) {
     if (Subtarget->inMips16Mode())
       OutStreamer.EmitRawText(StringRef("\t.set\tmips16"));
@@ -554,6 +558,10 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
   O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
 }
 
+// @LOCALMOD-START
+extern void EmitMipsSFIHeaders(raw_ostream &O);
+// @LOCALMOD-END
+
 void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   // FIXME: Use SwitchSection.
 
@@ -576,7 +584,38 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (OutStreamer.hasRawTextSupport())
     OutStreamer.EmitRawText(StringRef("\t.previous"));
 
+  // @LOCALMOD-START
+  if (Subtarget->isTargetNaCl()) {
+    if (OutStreamer.hasRawTextSupport()) {
+      std::string str;
+      raw_string_ostream OS(str);
+      EmitMipsSFIHeaders(OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    initializeNaClMCStreamer(OutStreamer, OutContext,
+                             Triple(Subtarget->getTargetTriple()));
+  }
+  // @LOCALMOD-END
+}
+
+// @LOCALMOD-START
+unsigned MipsAsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 4;
+    }
+  }
+  return 0;
 }
+// @LOCALMOD-END
 
 void MipsAsmPrinter::EmitEndOfAsmFile(Module &M) {
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index dbdaf266b7..df8b307955 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -83,6 +83,10 @@ public:
   void EmitEndOfAsmFile(Module &M);
   virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  // @LOCALMOD-START
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;
+  // @LOCALMOD-END
 };
 }
 
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 462def76cc..3f2ceabcd5 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -181,8 +181,13 @@ def CC_Mips_FastCC : CallingConv<[
 
   // Integer arguments are passed in integer registers. All scratch registers,
   // except for AT, V0 and T9, are available to be used as argument registers.
-  CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6,
-                                 T7, T8, V1]>>,
+  // @LOCALMOD-START
+  CCIfType<[i32], CCIfSubtarget<"isNotTargetNaCl()",
+      CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, V1]>>>,
+  // T6, T7 and T8 are reserved in NaCl and not available as argument registers.
+  CCIfType<[i32], CCIfSubtarget<"isTargetNaCl()",
+      CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, V1]>>>,
+  // @LOCALMOD-END
 
   // f32 arguments are passed in single-precision floating pointer registers.
   CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index d07a595af3..cf350b5b97 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -184,6 +184,9 @@ namespace {
     }
 
     bool runOnMachineFunction(MachineFunction &F) {
+      if (SkipDelaySlotFiller)
+        return false;
+
       bool Changed = false;
       for (MachineFunction::iterator FI = F.begin(), FE = F.end();
            FI != FE; ++FI)
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4d76181f92..1d9ca6e1d1 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -350,6 +350,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::VACOPY,            MVT::Other, Expand);
   setOperationAction(ISD::VAEND,             MVT::Other, Expand);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl())
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  else
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+  // @LOCALMOD-END
+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+
   // Use the default for now
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
@@ -383,6 +391,14 @@ MipsTargetLowering(MipsTargetMachine &TM)
     setTruncStoreAction(MVT::i64, MVT::i32, Custom);
   }
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
+
   setTargetDAGCombine(ISD::SDIVREM);
   setTargetDAGCombine(ISD::UDIVREM);
   setTargetDAGCombine(ISD::SELECT);
@@ -1480,8 +1496,68 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
   return getAddrLocal(Op, DAG, HasMips64);
 }
 
+// @LOCALMOD-BEGIN
+
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/nacl/tls_params.h
+SDValue MipsTargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue MipsTargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+		     Op.getOperand(0));
+}
+
 SDValue MipsTargetLowering::
-lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+GetNaClThreadPointer(SelectionDAG &DAG, DebugLoc DL) const {
+  EVT PtrVT = getPointerTy();
+  SDValue ThreadPointer;
+  if (llvm::TLSUseCall) {
+    unsigned PtrSize = PtrVT.getSizeInBits();
+    IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+
+    // We must check whether the __nacl_read_tp is defined in the module because
+    // local and global pic functions are called differently. If the function
+    // is local the address is calculated with %got and %lo relocations.
+    // Otherwise, the address is calculated with %call16 relocation.
+    const Function *NaClReadTp = NULL;
+    const Module *M = DAG.getMachineFunction().getFunction()->getParent();
+    for (Module::const_iterator I = M->getFunctionList().begin(),
+           E = M->getFunctionList().end(); I != E; ++I) {
+      if (I->getName() == "__nacl_read_tp") {
+        NaClReadTp = I;
+        break;
+      }
+    }
+
+    SDValue TlsReadTp;
+    if (NaClReadTp == NULL)
+      TlsReadTp = DAG.getExternalSymbol("__nacl_read_tp", PtrVT);
+    else
+      TlsReadTp = DAG.getGlobalAddress(NaClReadTp, DL, PtrVT);
+
+    ArgListTy Args;
+    TargetLowering::CallLoweringInfo CLI(
+        DAG.getEntryNode(), PtrTy, false, false, false, false, 0,
+        CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false,
+        /*isReturnValueUsed=*/true, TlsReadTp, Args, DAG, DL);
+    std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+    ThreadPointer = CallResult.first;
+  } else {
+    ThreadPointer = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
+                                       Mips::T8, PtrVT);
+  }
+  return ThreadPointer;
+}
+// @LOCALMOD-END
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 {
   // If the relocation model is PIC, use the General Dynamic TLS Model or
   // Local Dynamic TLS model, otherwise use the Initial Exec or
@@ -1494,6 +1570,27 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
 
   TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                                 MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                                 MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::Hi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, TGALo);
+    SDValue Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
+
+    SDValue ThreadPointer = GetNaClThreadPointer(DAG, dl);
+    // tprel_hi and tprel_lo relocations expect that thread pointer is offset
+    // by 0x7000 from the start of the TLS data area.
+    SDValue TPOffset = DAG.getConstant(0x7000, MVT::i32);
+    SDValue ThreadPointer2 = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer,
+                                         TPOffset);
+    return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer2, Offset);
+  }
+  // @LOCALMOD-END
+
   if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
     // General Dynamic and Local Dynamic TLS Model.
     unsigned Flag = (model == TLSModel::LocalDynamic) ? MipsII::MO_TLSLDM
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 5587e8f581..6c1e7ae3ec 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -386,6 +386,12 @@ namespace llvm {
     void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
                          SDValue Chain, DebugLoc DL, SelectionDAG &DAG) const;
 
+    // @LOCALMOD-BEGIN
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue GetNaClThreadPointer(SelectionDAG &DAG, DebugLoc DL) const;
+    // @LOCALMOD-END
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 6b23057c9c..af75392d11 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -319,23 +319,23 @@ let Predicates = [NotN64, NotMips64, HasStdEnc] in {
 }
 
 // Indexed loads and stores.
-let Predicates = [HasFPIdx, HasStdEnc] in {
+let Predicates = [HasFPIdx, HasStdEnc, IsNotNaCl/*@LOCALMOD*/] in {
   def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
   def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
 }
 
-let Predicates = [HasMips32r2, NotMips64, HasStdEnc] in {
+let Predicates = [HasMips32r2, NotMips64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/] in {
   def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
   def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
-let Predicates = [HasMips64, NotN64, HasStdEnc], DecoderNamespace="Mips64" in {
+let Predicates = [HasMips64, NotN64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/], DecoderNamespace="Mips64" in {
   def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
   def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
 // n64
-let Predicates = [IsN64, HasStdEnc], isCodeGenOnly=1 in {
+let Predicates = [IsN64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
   def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
   def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
                    LWXC1_FM<1>;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 86ec72982b..10d11dd835 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -181,6 +181,9 @@ def HasStdEnc :       Predicate<"Subtarget.hasStandardEncoding()">,
                       AssemblerPredicate<"!FeatureMips16">;
 def NotDSP :          Predicate<"!Subtarget.hasDSP()">;
 
+def IsNaCl       :    Predicate<"Subtarget.isTargetNaCl()">;
+def IsNotNaCl    :    Predicate<"!Subtarget.isTargetNaCl()">;
+
 class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [HasStdEnc];
 }
@@ -797,6 +800,37 @@ class MFC3OP<dag outs, dag ins, string asmstr> :
 // Pseudo instructions
 //===----------------------------------------------------------------------===//
 
+// @LOCALMOD-START
+
+// Older Macro based SFI Model
+def SFI_GUARD_LOADSTORE :
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_load_store_preamble\t$dst, $src1, $src2">;
+
+def SFI_GUARD_INDIRECT_CALL :
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_indirect_call_preamble\t$dst, $src1, $src2">;
+
+def SFI_GUARD_INDIRECT_JMP :
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_indirect_jump_preamble\t$dst, $src1, $src2">;
+
+def SFI_GUARD_CALL :
+MipsAsmPseudoInst<(outs), (ins), "sfi_call_preamble">;
+
+def SFI_GUARD_RETURN :
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_return_preamble\t$dst, $src1, $src2">;
+
+def SFI_NOP_IF_AT_BUNDLE_END :
+MipsAsmPseudoInst<(outs), (ins), "sfi_nop_if_at_bundle_end">;
+
+def SFI_DATA_MASK :
+MipsAsmPseudoInst<(outs CPURegs:$dst), (ins CPURegs:$src1, CPURegs:$src2),
+    "sfi_data_mask\t$dst, $src1, $src2">;
+
+// @LOCALMOD-END
+
 // Return RA.
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
 def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
diff --git a/lib/Target/Mips/MipsNaClHeaders.cpp b/lib/Target/Mips/MipsNaClHeaders.cpp
new file mode 100644
index 0000000000..375c287d67
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClHeaders.cpp
@@ -0,0 +1,128 @@
+//===-- MipsNaClHeaders.cpp - Print SFI headers to an Mips .s file --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the initial header string needed
+// for the Native Client target in Mips assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "MipsNaClRewritePass.h"
+#include <string>
+
+using namespace llvm;
+
+void EmitMipsSFIHeaders(raw_ostream &O) {
+  O << " # ========================================\n";
+  O << "# Branch: " << FlagSfiBranch << "\n";
+  O << "# Stack: " << FlagSfiStack << "\n";
+  O << "# Store: " << FlagSfiStore << "\n";
+  O << "# Load: " << FlagSfiLoad << "\n";
+
+  O << " # ========================================\n";
+  // NOTE: this macro does bundle alignment as follows
+  //       if current bundle pos is X emit pX data items of value "val"
+  // NOTE: that pos will be one of: 0,4,8,12
+  //
+  O <<
+    "\t.macro sfi_long_based_on_pos p0 p1 p2 p3 val\n"
+    "\t.set pos, (. - XmagicX) % 16\n"
+    "\t.fill  (((\\p3<<12)|(\\p2<<8)|(\\p1<<4)|\\p0)>>pos) & 15, 4, \\val\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nop_if_at_bundle_end\n"
+    "\tsfi_long_based_on_pos 0 0 0 1 0x00000000\n"
+    "\t.endm\n"
+      "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot3\n"
+    "\tsfi_long_based_on_pos 3 2 1 0 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot2\n"
+    "\tsfi_long_based_on_pos 2 1 0 3 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_nops_to_force_slot1\n"
+    "\tsfi_long_based_on_pos 1 0 3 2 0x00000000\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  O <<
+    "\t.macro sfi_data_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O <<
+    "\t.macro sfi_code_mask reg1 reg2 maskreg\n"
+    "\tand \\reg1, \\reg2, \\maskreg\n"
+    "\t.endm\n"
+    "\n\n";
+
+  O << " # ========================================\n";
+  if (FlagSfiBranch) {
+    O <<
+      "\t.macro sfi_call_preamble\n"
+      "\tsfi_nops_to_force_slot2\n"
+      "\t.endm\n"
+      "\n\n";
+
+    O <<
+      "\t.macro sfi_return_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jr"
+    O <<
+      "\t.macro sfi_indirect_jump_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+    // This is used just before "jalr"
+    O <<
+      "\t.macro sfi_indirect_call_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nops_to_force_slot1\n"
+      "\tsfi_code_mask \\reg1, \\reg2, \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+
+  }
+
+  if (FlagSfiStore) {
+    O << " # ========================================\n";
+
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\tsfi_nop_if_at_bundle_end\n"
+      "\tsfi_data_mask \\reg1, \\reg2 , \\maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  } else {
+    O <<
+      "\t.macro sfi_load_store_preamble reg1 reg2 maskreg\n"
+      "\t.endm\n"
+      "\n\n";
+  }
+
+  O << " # ========================================\n";
+  O << "\t.text\n";
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp
new file mode 100644
index 0000000000..6ab95109e9
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.cpp
@@ -0,0 +1,340 @@
+//===-- MipsNaClRewritePass.cpp - Native Client Rewrite Pass  -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Native Client Rewrite Pass
+// This final pass inserts the sandboxing instructions needed to run inside
+// the Native Client sandbox. Native Client requires certain software fault
+// isolation (SFI) constructions to be put in place, to prevent escape from
+// the sandbox. Native Client refuses to execute binaries without the correct
+// SFI sequences.
+//
+// Potentially dangerous operations which are protected include:
+// * Stores
+// * Branches
+// * Changes to SP
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-sfi"
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsNaClRewritePass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+unsigned Mips::IndirectBranchMaskReg = Mips::T6;
+unsigned Mips::LoadStoreStackMaskReg = Mips::T7;
+
+namespace {
+  class MipsNaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    MipsNaClRewritePass() : MachineFunctionPass(ID) {}
+
+    const MipsInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Mips Native Client Rewrite Pass";
+    }
+
+  private:
+
+    bool SandboxLoadsInBlock(MachineBasicBlock &MBB);
+    bool SandboxStoresInBlock(MachineBasicBlock &MBB);
+    void SandboxLoadStore(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI,
+                      MachineInstr &MI,
+                      int AddrIdx);
+
+    bool SandboxBranchesInBlock(MachineBasicBlock &MBB);
+    bool SandboxStackChangesInBlock(MachineBasicBlock &MBB);
+
+    void SandboxStackChange(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI);
+    void AlignAllJumpTargets(MachineFunction &MF);
+  };
+  char MipsNaClRewritePass::ID = 0;
+}
+
+static bool IsReturn(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::RET);
+}
+
+static bool IsIndirectJump(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JR);
+}
+
+static bool IsIndirectCall(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::JALR) || (MI.getOpcode() == Mips::JALRPseudo);
+}
+
+static bool IsDirectCall(const MachineInstr &MI) {
+  return ((MI.getOpcode() == Mips::JAL) || (MI.getOpcode() == Mips::BGEZAL)
+       || (MI.getOpcode() == Mips::BLTZAL));
+;
+}
+
+static bool IsStackMask(const MachineInstr &MI) {
+  return (MI.getOpcode() == Mips::SFI_DATA_MASK);
+}
+
+static bool NeedSandboxStackChange(const MachineInstr &MI,
+                                   const TargetRegisterInfo *TRI) {
+  if (IsDirectCall(MI) || IsIndirectCall(MI)) {
+    // We check this first because method modifiesRegister
+    // returns true for calls.
+    return false;
+  }
+  return (MI.modifiesRegister(Mips::SP, TRI) && !IsStackMask(MI));
+}
+
+void MipsNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_NOP_IF_AT_BUNDLE_END));
+
+  // Get to next instr (one + to get the original, and one more + to get past).
+  MachineBasicBlock::iterator MBBINext = (MBBI++);
+  (void) MBBINext;
+  MachineBasicBlock::iterator MBBINext2 = (MBBI++);
+
+  BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
+          TII->get(Mips::SFI_DATA_MASK), Mips::SP)
+          .addReg(Mips::SP)
+          .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+bool MipsNaClRewritePass::SandboxStackChangesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    if (NeedSandboxStackChange(MI, TRI)) {
+      SandboxStackChange(MBB, MBBI);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxBranchesInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+      MBBI != E; ++MBBI) {
+    MachineInstr &MI = *MBBI;
+
+    if (IsReturn(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_RETURN), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsIndirectJump(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_JMP), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    } else if (IsDirectCall(MI)) {
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_CALL));
+      Modified = true;
+    } else if (IsIndirectCall(MI)) {
+      unsigned AddrReg = MI.getOperand(0).getReg();
+      BuildMI(MBB, MBBI, MI.getDebugLoc(),
+              TII->get(Mips::SFI_GUARD_INDIRECT_CALL), AddrReg)
+          .addReg(AddrReg)
+          .addReg(Mips::IndirectBranchMaskReg);
+      Modified = true;
+    }
+  }
+
+  return Modified;
+}
+
+/*
+ * Sandboxes a load or store instruction by inserting an appropriate mask
+ * operation before it.
+ */
+void MipsNaClRewritePass::SandboxLoadStore(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      MachineInstr &MI,
+                                      int AddrIdx) {
+  unsigned BaseReg = MI.getOperand(AddrIdx).getReg();
+
+  BuildMI(MBB, MBBI, MI.getDebugLoc(),
+          TII->get(Mips::SFI_GUARD_LOADSTORE), BaseReg)
+      .addReg(BaseReg)
+      .addReg(Mips::LoadStoreStackMaskReg);
+  return;
+}
+
+bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::LB:
+  case Mips::LBu:
+  case Mips::LH:
+  case Mips::LHu:
+  case Mips::LW:
+  case Mips::LWC1:
+  case Mips::LDC1:
+  case Mips::LL:
+  case Mips::LWL:
+  case Mips::LWR:
+    *AddrIdx = 1;
+    break;
+  }
+
+  switch (MI.getOperand(*AddrIdx).getReg()) {
+    default: break;
+    // The contents of SP and thread pointer register do not require masking.
+    case Mips::SP:
+    case Mips::T8:
+      return false;
+  }
+
+  return true;
+}
+
+bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx) {
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+  default: return false;
+
+  // Instructions with base address register in position 1
+  case Mips::SB:
+  case Mips::SH:
+  case Mips::SW:
+  case Mips::SWC1:
+  case Mips::SDC1:
+  case Mips::SWL:
+  case Mips::SWR:
+    *AddrIdx = 1;
+    break;
+
+  case Mips::SC:
+    *AddrIdx = 2;
+    break;
+  }
+
+  switch (MI.getOperand(*AddrIdx).getReg()) {
+    default: break;
+    // The contents of SP and thread pointer register do not require masking.
+    case Mips::SP:
+    case Mips::T8:
+      return false;
+  }
+
+  return true;
+}
+
+bool MipsNaClRewritePass::SandboxLoadsInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousLoad(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+bool MipsNaClRewritePass::SandboxStoresInBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;
+       ++MBBI) {
+    MachineInstr &MI = *MBBI;
+    int AddrIdx;
+
+    if (IsDangerousStore(MI, &AddrIdx)) {
+      SandboxLoadStore(MBB, MBBI, MI, AddrIdx);
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+// Make sure all jump targets are aligned
+void MipsNaClRewritePass::AlignAllJumpTargets(MachineFunction &MF) {
+  // JUMP TABLE TARGETS
+  MachineJumpTableInfo *jt_info = MF.getJumpTableInfo();
+  if (jt_info) {
+    const std::vector<MachineJumpTableEntry> &JT = jt_info->getJumpTables();
+    for (unsigned i=0; i < JT.size(); ++i) {
+      std::vector<MachineBasicBlock*> MBBs = JT[i].MBBs;
+
+      for (unsigned j=0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(4);
+      }
+    }
+  }
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+                           I != E; ++I) {
+    MachineBasicBlock &MBB = *I;
+    if (MBB.hasAddressTaken())
+      MBB.setAlignment(4);
+  }
+}
+
+bool MipsNaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  TRI = MF.getTarget().getRegisterInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+
+    if (FlagSfiLoad)
+      Modified |= SandboxLoadsInBlock(MBB);
+    if (FlagSfiStore)
+      Modified |= SandboxStoresInBlock(MBB);
+    if (FlagSfiBranch)
+      Modified |= SandboxBranchesInBlock(MBB);
+    if (FlagSfiStack)
+      Modified |= SandboxStackChangesInBlock(MBB);
+  }
+
+  if (FlagSfiBranch)
+    AlignAllJumpTargets(MF);
+
+  return Modified;
+}
+
+/// createMipsNaClRewritePass - returns an instance of the NaClRewritePass.
+FunctionPass *llvm::createMipsNaClRewritePass() {
+  return new MipsNaClRewritePass();
+}
diff --git a/lib/Target/Mips/MipsNaClRewritePass.h b/lib/Target/Mips/MipsNaClRewritePass.h
new file mode 100644
index 0000000000..4e729ec985
--- /dev/null
+++ b/lib/Target/Mips/MipsNaClRewritePass.h
@@ -0,0 +1,21 @@
+//===-- MipsNaClRewritePass.h - NaCl Sandboxing Pass    ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPSNACLREWRITEPASS_H
+#define TARGET_MIPSNACLREWRITEPASS_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  extern cl::opt<bool> FlagSfiLoad;
+  extern cl::opt<bool> FlagSfiStore;
+  extern cl::opt<bool> FlagSfiStack;
+  extern cl::opt<bool> FlagSfiBranch;
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index dead07bacd..cd65635f01 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -116,6 +116,16 @@ getReservedRegs(const MachineFunction &MF) const {
   for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I)
     Reserved.set(ReservedCPURegs[I]);
 
+  // @LOCALMOD-BEGIN: reserved for PNaCl use
+  if (Subtarget.isTargetNaCl()) {
+    static const uint16_t PnaclReservedCPURegs[] = {
+      Mips::T6, Mips::T7, Mips::T8
+    };
+    for (unsigned I = 0; I < array_lengthof(PnaclReservedCPURegs); ++I)
+      Reserved.set(PnaclReservedCPURegs[I]);
+  }
+  // @LOCALMOD-END
+
   for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I)
     Reserved.set(ReservedCPU64Regs[I]);
 
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 14a2b27795..1d34b61bad 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -61,6 +61,9 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
   InMips16Mode(false), InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
   AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
   RM(_RM), OverrideMode(NoOverride), TM(_TM)
+  // @LOCALMOD-START
+  , TargetTriple(TT)
+  // @LOCALMOD-END
 {
   std::string CPUName = CPU;
   if (CPUName.empty())
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index f2f0e15887..864a3392af 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -118,12 +118,13 @@ protected:
   // Relocation Model
   Reloc::Model RM;
 
+  Triple TargetTriple;  // @LOCALMOD
+
   // We can override the determination of whether we are in mips16 mode
   // as from the command line
   enum {NoOverride, Mips16Override, NoMips16Override} OverrideMode;
 
   MipsTargetMachine *TM;
-
 public:
   virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
                                      AntiDepBreakMode& Mode,
@@ -192,6 +193,11 @@ public:
 
   bool os16() const { return Os16;};
 
+  // @LOCALMOD-START
+  bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
+  bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); }
+  // @LOCALMOD-END
+
   // Grab MipsRegInfo object
   const MipsReginfo &getMReginfo() const { return MRI; }
 
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ee28e2a122..a2fb63d74c 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -198,6 +198,14 @@ bool MipsPassConfig::addPreEmitPass() {
       Subtarget.allowMixed16_32())
     addPass(createMipsConstantIslandPass(TM));
 
+
+  // @LOCALMOD-START
+  if (getMipsSubtarget().isTargetNaCl()) {
+    // This pass does all the heavy sfi lifting.
+    addPass(createMipsNaClRewritePass());
+  }
+  // @LOCALMOD-END
+
   return true;
 }
 
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 4c748c5b57..90afe9b298 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -52,6 +52,24 @@ void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
                                  ELF::SHT_MIPS_REGINFO,
                                  ELF::SHF_ALLOC,
                                  SectionKind::getMetadata());
+
+  // @LOCALMOD-BEGIN
+  // Without this the linker defined symbols __fini_array_start and
+  // __fini_array_end do not have useful values. c.f.:
+  // http://code.google.com/p/nativeclient/issues/detail?id=805
+  if (Subtarget.isTargetNaCl()) {
+    StaticCtorSection =
+      getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+    StaticDtorSection =
+      getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  }
+  // @LOCALMOD-END
 }
 
 // A address must be loaded from a small section if its size is less than the
@@ -81,6 +99,12 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
   if (!Subtarget.useSmallSection())
     return false;
 
+  // @LOCALMOD-BEGIN
+  // Do not use small section for NaCl.
+  if (Subtarget.isTargetNaCl())
+    return false;
+  // @LOCALMOD-BEGIN
+
   // Only global variables, not functions.
   const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
   if (!GVA)
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index e7282519d5..3f43f95918 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -29,6 +29,7 @@ using namespace llvm;
 namespace llvm {
   bool HasDivModLibcall;
   bool AsmVerbosityDefault(false);
+  bool TLSUseCall; // @LOCALMOD
 }
 
 static cl::opt<bool>
@@ -39,6 +40,20 @@ static cl::opt<bool>
 FunctionSections("ffunction-sections",
   cl::desc("Emit functions into separate sections"),
   cl::init(false));
+// @LOCALMOD-BEGIN
+// Use a function call to get the thread pointer for TLS accesses,
+// instead of using inline code.
+static cl::opt<bool, true>
+EnableTLSUseCall("mtls-use-call",
+  cl::desc("Use a function call to get the thread pointer for TLS accesses."),
+  cl::location(TLSUseCall),
+  cl::init(false));
+
+static cl::opt<bool>
+  ForceTLSNonPIC("force-tls-non-pic",
+                 cl::desc("Force TLS to use non-PIC models"),
+                 cl::init(false));
+// @LOCALMOD-END
 
 //---------------------------------------------------------------------------
 // TargetMachine Class
@@ -137,7 +152,8 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
   bool isHidden = Var->hasHiddenVisibility();
 
   TLSModel::Model Model;
-  if (isPIC && !isPIE) {
+  if (isPIC && !isPIE &&
+      !ForceTLSNonPIC) { // @LOCALMOD
     if (isLocal || isHidden)
       Model = TLSModel::LocalDynamic;
     else
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 7cb71f066c..bc94b6396e 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -25,6 +25,7 @@ set(sources
   X86JITInfo.cpp
   X86MCInstLower.cpp
   X86MachineFunctionInfo.cpp
+  X86NaClRewritePass.cpp
   X86PadShortFunction.cpp
   X86RegisterInfo.cpp
   X86SelectionDAGInfo.cpp
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 1c240e52a3..8be0c5e6d7 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -3,6 +3,7 @@ add_llvm_library(LLVMX86Desc
   X86MCTargetDesc.cpp
   X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp
+  X86MCNaCl.cpp # LOCALMOD
   X86MachObjectWriter.cpp
   X86ELFObjectWriter.cpp
   X86WinCOFFObjectWriter.cpp
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 598ddee56d..015a39e3d6 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -9,6 +9,7 @@
 
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86FixupKinds.h"
+#include "MCTargetDesc/X86MCNaCl.h" // @LOCALMOD
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCELFObjectWriter.h"
@@ -98,12 +99,19 @@ public:
     assert(Fixup.getOffset() + Size <= DataSize &&
            "Invalid fixup offset!");
 
+    // @LOCALMOD-BEGIN
+    // This check breaks negative addends on x86-32.  It makes x86-32
+    // behaviour inconsistent with x86-64 and ARM.
+    // See: https://code.google.com/p/nativeclient/issues/detail?id=3548
+#if 0
     // Check that uppper bits are either all zeros or all ones.
     // Specifically ignore overflow/underflow as long as the leakage is
     // limited to the lower bits. This is to remain compatible with
     // other assemblers.
     assert(isIntN(Size * 8 + 1, Value) &&
            "Value does not fit in the Fixup field");
+#endif
+    // @LOCALMOD-END
 
     for (unsigned i = 0; i != Size; ++i)
       Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
@@ -346,6 +354,7 @@ public:
     const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
     return ES.getFlags() & ELF::SHF_MERGE;
   }
+
 };
 
 class ELFX86_32AsmBackend : public ELFX86AsmBackend {
@@ -368,6 +377,28 @@ public:
   }
 };
 
+// @LOCALMOD-BEGIN
+class NaClX86_32AsmBackend : public ELFX86_32AsmBackend {
+public:
+  NaClX86_32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+    : ELFX86_32AsmBackend(T, OSABI, CPU) {}
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return CustomExpandInstNaClX86(Inst, Out);
+  }
+};
+
+class NaClX86_64AsmBackend : public ELFX86_64AsmBackend {
+public:
+  NaClX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+    : ELFX86_64AsmBackend(T, OSABI, CPU) {}
+
+  bool CustomExpandInst(const MCInst &Inst, MCStreamer &Out) const {
+    return CustomExpandInstNaClX86(Inst, Out);
+  }
+};
+// @LOCALMOD-END
+
 class WindowsX86AsmBackend : public X86AsmBackend {
   bool Is64Bit;
 
@@ -459,6 +490,10 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, StringRef TT, String
     return new WindowsX86AsmBackend(T, false, CPU);
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+  // @LOCALMOD-BEGIN
+  if (TheTriple.isOSNaCl())
+    return new NaClX86_32AsmBackend(T, OSABI, CPU);
+  // @LOCALMOD-END
   return new ELFX86_32AsmBackend(T, OSABI, CPU);
 }
 
@@ -472,5 +507,9 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringRef TT, String
     return new WindowsX86AsmBackend(T, true, CPU);
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+  // @LOCALMOD-BEGIN
+  if (TheTriple.isOSNaCl())
+    return new NaClX86_64AsmBackend(T, OSABI, CPU);
+  // @LOCALMOD-END
   return new ELFX86_64AsmBackend(T, OSABI, CPU);
 }
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index d8f727887f..91d2f15b6f 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -292,6 +292,8 @@ namespace X86II {
     /// manual, this operand is described as pntr16:32 and pntr16:16
     RawFrmImm16 = 44,
 
+    CustomFrm = 62, // @LOCALMOD
+
     FormMask       = 63,
 
     //===------------------------------------------------------------------===//
@@ -563,6 +565,7 @@ namespace X86II {
     case X86II::MRMSrcReg:
     case X86II::RawFrmImm8:
     case X86II::RawFrmImm16:
+    case X86II::CustomFrm: // @LOCALMOD
        return -1;
     case X86II::MRMDestMem:
       return 0;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 7815ae98c9..2039b7d210 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -1,3 +1,4 @@
+
 //===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -79,10 +80,15 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
   bool is64Bit = T.getArch() == Triple::x86_64;
   bool isX32 = T.getEnvironment() == Triple::GNUX32;
 
+  // @LOCALMOD-BEGIN(eliben)
+  // Until Nacl implies x32, we add &&!isNaCl in the PointerSize condition
+  bool isNaCl = T.isOSNaCl();
+
   // For ELF, x86-64 pointer size depends on the ABI.
   // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
   // with the x32 ABI, pointer size remains the default 4.
-  PointerSize = (is64Bit && !isX32) ? 8 : 4;
+  PointerSize = (is64Bit && !isX32 && !isNaCl) ? 8 : 4;
+  // @LOCALMOD-END
 
   // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
   CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 016af71501..e35b36bc62 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -873,7 +873,6 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                         int MemOperand, const MCInst &MI,
                                         const MCInstrDesc &Desc,
                                         raw_ostream &OS) const {
-
   // Emit the lock opcode prefix as needed.
   if (TSFlags & X86II::LOCK)
     EmitByte(0xF0, CurByte, OS);
@@ -1029,6 +1028,10 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
     llvm_unreachable("Unknown FormMask value in X86MCCodeEmitter!");
   case X86II::Pseudo:
     llvm_unreachable("Pseudo instruction shouldn't be emitted");
+  // @LOCALMOD-BEGIN
+  case X86II::CustomFrm:
+    assert(0 && "CustomFrm instruction shouldn't be emitted");
+  // @LOCALMOD-END
   case X86II::RawFrm:
     EmitByte(BaseOpcode, CurByte, OS);
     break;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
new file mode 100644
index 0000000000..9acaf68c82
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
@@ -0,0 +1,695 @@
+//=== X86MCNaCl.cpp - Expansion of NaCl pseudo-instructions      --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86MCNaCl.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+// This option makes it possible to overwrite the x86 jmp mask immediate.
+// Setting it to -1 will effectively turn masking into a nop which will
+// help with linking this code with non-sandboxed libs (at least for x86-32).
+cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32));
+
+cl::opt<bool> FlagUseZeroBasedSandbox("sfi-zero-based-sandbox",
+                                      cl::desc("Use a zero-based sandbox model"
+                                               " for the NaCl SFI."),
+                                      cl::init(false));
+
+static unsigned PrefixSaved = 0;
+static bool PrefixPass = false;
+
+// See the notes below where these functions are defined.
+namespace {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false);
+unsigned DemoteRegTo32_(unsigned RegIn);
+} // namespace
+
+static void EmitDirectCall(const MCOperand &Op, bool Is64Bit,
+                           MCStreamer &Out) {
+  Out.EmitBundleLock(true);
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+  CALLInst.addOperand(Op);
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
+                               MCStreamer &Out) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  const int JmpMask = FlagSfiX86JmpMask;
+  const unsigned Reg32 = Op.getReg();
+  const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+
+  Out.EmitBundleLock(IsCall);
+
+  MCInst ANDInst;
+  ANDInst.setOpcode(X86::AND32ri8);
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateReg(Reg32));
+  ANDInst.addOperand(MCOperand::CreateImm(JmpMask));
+  Out.EmitInstruction(ANDInst);
+
+  if (Is64Bit && !UseZeroBasedSandbox) {
+    MCInst InstADD;
+    InstADD.setOpcode(X86::ADD64rr);
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(Reg64));
+    InstADD.addOperand(MCOperand::CreateReg(X86::R15));
+    Out.EmitInstruction(InstADD);
+  }
+
+  if (IsCall) {
+    MCInst CALLInst;
+    CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r);
+    CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(CALLInst);
+  } else {
+    MCInst JMPInst;
+    JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r);
+    JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
+    Out.EmitInstruction(JMPInst);
+  }
+  Out.EmitBundleUnlock();
+}
+
+static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) {
+  MCInst POPInst;
+  POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
+  POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX));
+  Out.EmitInstruction(POPInst);
+
+  if (AmtOp) {
+    assert(!Is64Bit);
+    MCInst ADDInst;
+    unsigned ADDReg = X86::ESP;
+    ADDInst.setOpcode(X86::ADD32ri);
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(MCOperand::CreateReg(ADDReg));
+    ADDInst.addOperand(*AmtOp);
+    Out.EmitInstruction(ADDInst);
+  }
+
+  MCInst JMPInst;
+  JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
+  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  Out.EmitInstruction(JMPInst);
+}
+
+static void EmitTrap(bool Is64Bit, MCStreamer &Out) {
+  // Rewrite to:
+  //    X86-32:  mov $0, 0
+  //    X86-64:  mov $0, (%r15)
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  unsigned BaseReg = Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0;
+
+  MCInst Tmp;
+  Tmp.setOpcode(X86::MOV32mi);
+  Tmp.addOperand(MCOperand::CreateReg(BaseReg)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1)); // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0)); // IndexReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0)); // SegmentReg
+  Tmp.addOperand(MCOperand::CreateImm(0)); // Value
+
+  Out.EmitInstruction(Tmp);
+}
+
+// Fix a register after being truncated to 32-bits.
+static void EmitRegFix(unsigned Reg64, MCStreamer &Out) {
+  // lea (%rsp, %r15, 1), %rsp
+  // We do not need to add the R15 base for the zero-based sandbox model
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  if (!UseZeroBasedSandbox) {
+    MCInst Tmp;
+    Tmp.setOpcode(X86::LEA64r);
+    Tmp.addOperand(MCOperand::CreateReg(Reg64));    // DestReg
+    Tmp.addOperand(MCOperand::CreateReg(Reg64));    // BaseReg
+    Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+    Tmp.addOperand(MCOperand::CreateReg(X86::R15)); // IndexReg
+    Tmp.addOperand(MCOperand::CreateImm(0));        // Offset
+    Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+    Out.EmitInstruction(Tmp);
+  }
+}
+
+static void EmitSPArith(unsigned Opc, const MCOperand &ImmOp,
+                        MCStreamer &Out) {
+  Out.EmitBundleLock(false);
+
+  MCInst Tmp;
+  Tmp.setOpcode(Opc);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP));
+  Tmp.addOperand(ImmOp);
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitSPAdj(const MCOperand &ImmOp, MCStreamer &Out) {
+  Out.EmitBundleLock(false);
+
+  MCInst Tmp;
+  Tmp.setOpcode(X86::LEA64_32r);
+  Tmp.addOperand(MCOperand::CreateReg(X86::RSP)); // DestReg
+  Tmp.addOperand(MCOperand::CreateReg(X86::RBP)); // BaseReg
+  Tmp.addOperand(MCOperand::CreateImm(1));        // Scale
+  Tmp.addOperand(MCOperand::CreateReg(0));        // IndexReg
+  Tmp.addOperand(ImmOp);                          // Offset
+  Tmp.addOperand(MCOperand::CreateReg(0));        // SegmentReg
+  Out.EmitInstruction(Tmp);
+
+  EmitRegFix(X86::RSP, Out);
+  Out.EmitBundleUnlock();
+}
+
+static void EmitPrefix(unsigned Opc, MCStreamer &Out) {
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+
+  MCInst PrefixInst;
+  PrefixInst.setOpcode(Opc);
+  PrefixPass = true;
+  Out.EmitInstruction(PrefixInst);
+
+  assert(PrefixSaved == 0);
+  assert(PrefixPass == false);
+}
+
+static void EmitMoveRegReg(bool Is64Bit, unsigned ToReg,
+                           unsigned FromReg, MCStreamer &Out) {
+  MCInst Move;
+  Move.setOpcode(Is64Bit ? X86::MOV64rr : X86::MOV32rr);
+  Move.addOperand(MCOperand::CreateReg(ToReg));
+  Move.addOperand(MCOperand::CreateReg(FromReg));
+  Out.EmitInstruction(Move);
+}
+
+static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) {
+  unsigned Reg32 = getX86SubSuperRegister_(Reg64, MVT::i32);
+  EmitMoveRegReg(false, Reg32, Reg32, Out);
+}
+
+static void HandleMemoryRefTruncation(MCInst *Inst, unsigned IndexOpPosition,
+                                      MCStreamer &Out) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg();
+  if (UseZeroBasedSandbox) {
+    // With the zero-based sandbox, we use a 32-bit register on the index
+    Inst->getOperand(IndexOpPosition).setReg(DemoteRegTo32_(IndexReg));
+  } else {
+    EmitRegTruncate(IndexReg, Out);
+  }
+}
+
+static void ShortenMemoryRef(MCInst *Inst, unsigned IndexOpPosition) {
+  unsigned ImmOpPosition = IndexOpPosition - 1;
+  unsigned BaseOpPosition = IndexOpPosition - 2;
+  unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg();
+  // For the SIB byte, if the scale is 1 and the base is 0, then
+  // an equivalent setup moves index to base, and index to 0.  The
+  // equivalent setup is optimized to remove the SIB byte in
+  // X86MCCodeEmitter.cpp.
+  if (Inst->getOperand(ImmOpPosition).getImm() == 1 &&
+      Inst->getOperand(BaseOpPosition).getReg() == 0) {
+    Inst->getOperand(BaseOpPosition).setReg(IndexReg);
+    Inst->getOperand(IndexOpPosition).setReg(0);
+  }
+}
+
+static void EmitLoad(bool Is64Bit,
+                     unsigned DestReg,
+                     unsigned BaseReg,
+                     unsigned Scale,
+                     unsigned IndexReg,
+                     unsigned Offset,
+                     unsigned SegmentReg,
+                     MCStreamer &Out) {
+  // Load DestReg from address BaseReg + Scale * IndexReg + Offset
+  MCInst Load;
+  Load.setOpcode(Is64Bit ? X86::MOV64rm : X86::MOV32rm);
+  Load.addOperand(MCOperand::CreateReg(DestReg));
+  Load.addOperand(MCOperand::CreateReg(BaseReg));
+  Load.addOperand(MCOperand::CreateImm(Scale));
+  Load.addOperand(MCOperand::CreateReg(IndexReg));
+  Load.addOperand(MCOperand::CreateImm(Offset));
+  Load.addOperand(MCOperand::CreateReg(SegmentReg));
+  Out.EmitInstruction(Load);
+}
+
+static bool SandboxMemoryRef(MCInst *Inst,
+                             unsigned *IndexOpPosition) {
+  for (unsigned i = 0, last = Inst->getNumOperands(); i < last; i++) {
+    if (!Inst->getOperand(i).isReg() ||
+        Inst->getOperand(i).getReg() != X86::PSEUDO_NACL_SEG) {
+      continue;
+    }
+    // Return the index register that will need to be truncated.
+    // The order of operands on a memory reference is always:
+    // (BaseReg, ScaleImm, IndexReg, DisplacementImm, SegmentReg),
+    // So if we found a match for a segment register value, we know that
+    // the index register is exactly two operands prior.
+    *IndexOpPosition = i - 2;
+
+    // Remove the PSEUDO_NACL_SEG annotation.
+    Inst->getOperand(i).setReg(0);
+    return true;
+  }
+  return false;
+}
+
+static void EmitTLSAddr32(const MCInst &Inst, MCStreamer &Out) {
+  Out.EmitBundleLock(true);
+
+  MCInst LeaInst;
+  LeaInst.setOpcode(X86::LEA32r);
+  LeaInst.addOperand(MCOperand::CreateReg(X86::EAX));    // DestReg
+  LeaInst.addOperand(Inst.getOperand(0)); // BaseReg
+  LeaInst.addOperand(Inst.getOperand(1)); // Scale
+  LeaInst.addOperand(Inst.getOperand(2)); // IndexReg
+  LeaInst.addOperand(Inst.getOperand(3)); // Offset
+  LeaInst.addOperand(Inst.getOperand(4)); // SegmentReg
+  Out.EmitInstruction(LeaInst);
+
+  MCInst CALLInst;
+  CALLInst.setOpcode(X86::CALLpcrel32);
+  MCContext &context = Out.getContext();
+  const MCSymbolRefExpr *expr =
+    MCSymbolRefExpr::Create(
+      context.GetOrCreateSymbol(StringRef("___tls_get_addr")),
+      MCSymbolRefExpr::VK_PLT, context);
+  CALLInst.addOperand(MCOperand::CreateExpr(expr));
+  Out.EmitInstruction(CALLInst);
+  Out.EmitBundleUnlock();
+}
+
+
+static void EmitREST(const MCInst &Inst, unsigned Reg32,
+                     bool IsMem, MCStreamer &Out) {
+  unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
+  Out.EmitBundleLock(false);
+  if (!IsMem) {
+    EmitMoveRegReg(false, Reg32, Inst.getOperand(0).getReg(), Out);
+  } else {
+    unsigned IndexOpPosition;
+    MCInst SandboxedInst = Inst;
+    if (SandboxMemoryRef(&SandboxedInst, &IndexOpPosition)) {
+      HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out);
+      ShortenMemoryRef(&SandboxedInst, IndexOpPosition);
+    }
+    EmitLoad(false,
+             Reg32,
+             SandboxedInst.getOperand(0).getReg(),  // BaseReg
+             SandboxedInst.getOperand(1).getImm(),  // Scale
+             SandboxedInst.getOperand(2).getReg(),  // IndexReg
+             SandboxedInst.getOperand(3).getImm(),  // Offset
+             SandboxedInst.getOperand(4).getReg(),  // SegmentReg
+             Out);
+  }
+
+  EmitRegFix(Reg64, Out);
+  Out.EmitBundleUnlock();
+}
+
+
+namespace llvm {
+// CustomExpandInstNaClX86 -
+//   If Inst is a NaCl pseudo instruction, emits the substitute
+//   expansion to the MCStreamer and returns true.
+//   Otherwise, returns false.
+//
+//   NOTE: Each time this function calls Out.EmitInstruction(), it will be
+//   called again recursively to rewrite the new instruction being emitted.
+//   Care must be taken to ensure that this does not result in an infinite
+//   loop. Also, global state must be managed carefully so that it is
+//   consistent during recursive calls.
+//
+//   We need global state to keep track of the explicit prefix (PREFIX_*)
+//   instructions. Unfortunately, the assembly parser prefers to generate
+//   these instead of combined instructions. At this time, having only
+//   one explicit prefix is supported.
+bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  // If we are emitting to .s, just emit all pseudo-instructions directly.
+  if (Out.hasRawTextSupport()) {
+    return false;
+  }
+  unsigned Opc = Inst.getOpcode();
+  DEBUG(dbgs() << "CustomExpandInstNaClX86("; Inst.dump(); dbgs() << ")\n");
+  switch (Opc) {
+  case X86::LOCK_PREFIX:
+  case X86::REP_PREFIX:
+  case X86::REPNE_PREFIX:
+  case X86::REX64_PREFIX:
+    // Ugly hack because LLVM AsmParser is not smart enough to combine
+    // prefixes back into the instruction they modify.
+    if (PrefixPass) {
+      PrefixPass = false;
+      PrefixSaved = 0;
+      return false;
+    }
+    assert(PrefixSaved == 0);
+    PrefixSaved = Opc;
+    return true;
+  case X86::NACL_TRAP32:
+    assert(PrefixSaved == 0);
+    EmitTrap(false, Out);
+    return true;
+  case X86::NACL_TRAP64:
+    assert(PrefixSaved == 0);
+    EmitTrap(true, Out);
+    return true;
+  case X86::NACL_CALL32d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_CALL64d:
+    assert(PrefixSaved == 0);
+    EmitDirectCall(Inst.getOperand(0), true, Out);
+    return true;
+  case X86::NACL_CALL32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, true, Out);
+    return true;
+  case X86::NACL_CALL64r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, true, Out);
+    return true;
+  case X86::NACL_JMP32r:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), false, false, Out);
+    return true;
+  case X86::NACL_TLS_addr32:
+    assert(PrefixSaved == 0);
+    EmitTLSAddr32(Inst, Out);
+    return true;
+  case X86::NACL_JMP64r:
+  case X86::NACL_JMP64z:
+    assert(PrefixSaved == 0);
+    EmitIndirectBranch(Inst.getOperand(0), true, false, Out);
+    return true;
+  case X86::NACL_RET32:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, false, Out);
+    return true;
+  case X86::NACL_RET64:
+    assert(PrefixSaved == 0);
+    EmitRet(NULL, true, Out);
+    return true;
+  case X86::NACL_RETI32:
+    assert(PrefixSaved == 0);
+    EmitRet(&Inst.getOperand(0), false, Out);
+    return true;
+  case X86::NACL_ASPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ASPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::ADD32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi8:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri8, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::SUB32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_ANDSPi32:
+    assert(PrefixSaved == 0);
+    EmitSPArith(X86::AND32ri, Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_SPADJi32:
+    assert(PrefixSaved == 0);
+    EmitSPAdj(Inst.getOperand(0), Out);
+    return true;
+  case X86::NACL_RESTBPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, true, Out);
+    return true;
+  case X86::NACL_RESTBPr:
+  case X86::NACL_RESTBPrz:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::EBP, false, Out);
+    return true;
+  case X86::NACL_RESTSPm:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, true, Out);
+    return true;
+  case X86::NACL_RESTSPr:
+  case X86::NACL_RESTSPrz:
+    assert(PrefixSaved == 0);
+    EmitREST(Inst, X86::ESP, false, Out);
+    return true;
+  }
+
+  unsigned IndexOpPosition;
+  MCInst SandboxedInst = Inst;
+  if (SandboxMemoryRef(&SandboxedInst, &IndexOpPosition)) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+
+    if (PrefixLocal || !UseZeroBasedSandbox)
+      Out.EmitBundleLock(false);
+
+    HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out);
+    ShortenMemoryRef(&SandboxedInst, IndexOpPosition);
+
+    if (PrefixLocal)
+      EmitPrefix(PrefixLocal, Out);
+    Out.EmitInstruction(SandboxedInst);
+
+    if (PrefixLocal || !UseZeroBasedSandbox)
+      Out.EmitBundleUnlock();
+    return true;
+  }
+
+  if (PrefixSaved) {
+    unsigned PrefixLocal = PrefixSaved;
+    PrefixSaved = 0;
+    EmitPrefix(PrefixLocal, Out);
+  }
+  return false;
+}
+
+} // namespace llvm
+
+
+
+
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+//
+// This is an exact copy of getX86SubSuperRegister from X86RegisterInfo.h
+// We cannot use the original because it is part of libLLVMX86CodeGen,
+// which cannot be a dependency of this module (libLLVMX86Desc).
+//
+// However, in all likelyhood, the real getX86SubSuperRegister will
+// eventually be moved to MCTargetDesc, and then this copy can be
+// removed.
+
+namespace {
+unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return Reg;
+  case MVT::i8:
+    if (High) {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case MVT::i16:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case MVT::i32:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case MVT::i64:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+
+  return Reg;
+}
+
+// This is a copy of DemoteRegTo32 from X86NaClRewritePass.cpp.
+// We cannot use the original because it uses part of libLLVMX86CodeGen,
+// which cannot be a dependency of this module (libLLVMX86Desc).
+// Note that this function calls getX86SubSuperRegister_, which is
+// also a copied function for the same reason.
+
+unsigned DemoteRegTo32_(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister_(RegIn, MVT::i32, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+} //namespace
+// @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.h b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
new file mode 100644
index 0000000000..01b400d4d9
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.h
@@ -0,0 +1,19 @@
+//===-- X86MCNaCl.h - Prototype for CustomExpandInstNaClX86   ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCNACL_H
+#define X86MCNACL_H
+
+namespace llvm {
+  class MCInst;
+  class MCStreamer;
+  bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out);
+}
+
+#endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 947002fd14..a8eb0a256d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -47,6 +47,10 @@ FunctionPass *createCleanupLocalDynamicTLSPass();
 ///
 FunctionPass *createX86FloatingPointStackifierPass();
 
+// @LOCALMOD-BEGIN - Creates a pass to make instructions follow NaCl SFI rules.
+FunctionPass* createX86NaClRewritePass();
+// @LOCALMOD-END
+
 /// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
 /// before each call to avoid transition penalty between functions encoded with
 /// AVX and SSE.
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 6b228b0b03..c4e8cfe0a9 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -31,6 +31,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCNaCl.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -71,6 +72,35 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
+// @LOCALMOD-BEGIN
+bool X86AsmPrinter::UseReadOnlyJumpTables() const {
+  return Subtarget->isTargetNaCl();
+}
+
+unsigned X86AsmPrinter::GetTargetBasicBlockAlign() const {
+  if (Subtarget->isTargetNaCl())
+    return 5;
+  return 0;
+}
+
+unsigned X86AsmPrinter::GetTargetLabelAlign(const MachineInstr *MI) const {
+  if (Subtarget->isTargetNaCl()) {
+    switch (MI->getOpcode()) {
+      default: return 0;
+      // These labels may indicate an indirect entry point that is
+      // externally reachable and hence must be bundle aligned.
+      // Note: these labels appear to be always at basic block beginnings
+      // so it may be possible to simply set the MBB alignment.
+      // However, it is unclear whether this always holds.
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        return 5;
+    }
+  }
+  return 0;
+}
+// @LOCALMOD-END
+
 /// printSymbolOperand - Print a raw symbol reference operand.  This handles
 /// jump tables, constant pools, global address and external symbols, all of
 /// which print to a label with various suffixes for relocation types etc.
@@ -518,6 +548,11 @@ bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (Subtarget->isTargetEnvMacho())
     OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl())
+    initializeNaClMCStreamer(OutStreamer, OutContext,
+                             Subtarget->getTargetTriple());
+  // @LOCALMOD-END
 }
 
 
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index bc7496bad1..1870069229 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -42,6 +42,12 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
 
   virtual void EmitInstruction(const MachineInstr *MI) LLVM_OVERRIDE;
 
+  virtual bool UseReadOnlyJumpTables() const; // @LOCALMOD
+
+  virtual unsigned GetTargetBasicBlockAlign() const; // @LOCLAMOD
+
+  virtual unsigned GetTargetLabelAlign(const MachineInstr *MI) const;//@LOCALMOD
+
   void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
 
   // These methods are used by the tablegen'erated instruction printer.
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 9eafbd55a5..fa29bb2a04 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -535,3 +535,9 @@ def CSR_64_Intel_OCL_BI       : CalleeSavedRegs<(add CSR_64,
 //Standard C + YMM 8-15
 def CSR_64_Intel_OCL_BI_AVX    : CalleeSavedRegs<(add CSR_64,
                                                   (sequence "YMM%u", 8, 15))>;
+
+// @LOCALMOD-BEGIN
+// NaCl x86-64 (R15 cannot be modified):
+def CSR_NaCl64 : CalleeSavedRegs<(add RBX, R12, R13, R14, RBP)>;
+def CSR_NaCl64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_NaCl64)>;
+// @LOCALMOD-END
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cf44bd033b..057db80e4d 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -36,8 +36,14 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h" // @LOCALMOD
 using namespace llvm;
 
+// @LOCALMOD-BEGIN
+#define DEBUG_TYPE "isel"
+STATISTIC(NumFastIselNaClFailures, "Number of instructions fast isel failed on for NaCl illegality");
+// @LOCALMOD-END
+
 namespace {
 
 class X86FastISel : public FastISel {
@@ -339,6 +345,16 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
 
 /// X86SelectAddress - Attempt to fill in an address from the given value.
 ///
+/// @LOCALMOD-BEGIN
+/// All "return v;" statements must be converted to
+/// "return (v) && isLegalAddressingModeForNaCl(Subtarget, AM);"
+/// except that "return false;" can of course be left unchanged.
+///
+/// Since X86SelectAddress() recursively builds up the AM result
+/// object, there is a risk that an intermediate result could be
+/// rejected in a situation where the final result was in fact legal,
+/// though it is hard to imagine this happening.
+/// @LOCALMOD-END
 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
   const User *U = NULL;
   unsigned Opcode = Instruction::UserOp1;
@@ -388,7 +404,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     if (SI != FuncInfo.StaticAllocaMap.end()) {
       AM.BaseType = X86AddressMode::FrameIndexBase;
       AM.Base.FrameIndex = SI->second;
-      return true;
+      return isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
     }
     break;
   }
@@ -471,7 +487,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
     AM.Scale = Scale;
     AM.Disp = (uint32_t)Disp;
     if (X86SelectAddress(U->getOperand(0), AM))
-      return true;
+      return isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
 
     // If we couldn't merge the gep value into this addr mode, revert back to
     // our address and just match the value instead of completely failing.
@@ -529,7 +545,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
           AM.Base.Reg = X86::RIP;
         }
         AM.GVOpFlags = GVFlags;
-        return true;
+        return isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
       }
 
       // Ok, we need to do a load from a stub.  If we've already loaded from
@@ -556,6 +572,14 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
 
           if (Subtarget->isPICStyleRIPRel())
             StubAM.Base.Reg = X86::RIP;
+        // @LOCALMOD-BEGIN
+        } else if (Subtarget->isTargetNaCl64()) {
+          Opc = X86::MOV32rm;
+          RC  = &X86::GR32RegClass;
+
+          if (Subtarget->isPICStyleRIPRel())
+            StubAM.Base.Reg = X86::RIP;
+        // @LOCALMOD-END
         } else {
           Opc = X86::MOV32rm;
           RC  = &X86::GR32RegClass;
@@ -577,20 +601,38 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
       // and Index values may already be set here.
       AM.Base.Reg = LoadReg;
       AM.GV = 0;
-      return true;
+      return isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
     }
   }
 
   // If all else fails, try to materialize the value in a register.
   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    // @LOCALMOD-START
+    if (Subtarget->isTargetNaCl64()) {
+      // We are about use a register in an addressing mode. However, x86-64
+      // NaCl does not allow arbitrary r+r addressing. One of the regs must
+      // be %r15 (inserted by the NaClRewritePass). Check that we will only
+      // end up with one reg defined after this.
+      if ((AM.Base.Reg == 0) && (AM.IndexReg == 0)) {
+        // Put into index register so that the NaCl rewrite pass will
+        // convert this to a 64-bit address.
+        AM.IndexReg = getRegForValue(V);
+        return AM.IndexReg != 0
+          && isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
+      }
+      return false;
+    }
+    // @LOCALMOD-END
     if (AM.Base.Reg == 0) {
       AM.Base.Reg = getRegForValue(V);
-      return AM.Base.Reg != 0;
+      return AM.Base.Reg != 0
+        && isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
     }
     if (AM.IndexReg == 0) {
       assert(AM.Scale == 1 && "Scale with no index!");
       AM.IndexReg = getRegForValue(V);
-      return AM.IndexReg != 0;
+      return AM.IndexReg != 0
+        && isLegalAddressingModeForNaCl(Subtarget, AM); // @LOCALMOD
     }
   }
 
@@ -819,10 +861,15 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
-    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+    // @LOCALMOD-BEGIN -- Ensure that the register classes match.
+    // At this point, SRetReturnReg is EDI, because PointerTy() for NaCl
+    // is i32.  We then copy to EAX instead of RAX.  Alternatively, we could
+    // have zero-extended EDI to RDI then copy to RAX, but this has a smaller
+    // encoding (2 bytes vs 3 bytes).
+    unsigned CopyTo = Subtarget->has64BitPointers() ? X86::RAX : X86::EAX;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            RetReg).addReg(Reg);
-    RetRegs.push_back(RetReg);
+            CopyTo).addReg(Reg);
+    // @LOCALMOD-END
   }
 
   // Now emit the RET.
@@ -1496,6 +1543,7 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
     else if (Len >= 2)
       VT = MVT::i16;
     else {
+      assert(Len == 1);
       VT = MVT::i8;
     }
 
@@ -2032,10 +2080,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   if (CalleeOp) {
     // Register-indirect call.
     unsigned CallOpc;
-    if (Subtarget->is64Bit())
-      CallOpc = X86::CALL64r;
-    else
-      CallOpc = X86::CALL32r;
+    // @LOCALMOD-BEGIN
+    if (Subtarget->is64Bit()) {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL64r;
+      } else {
+        CallOpc = X86::CALL64r;
+      }
+    } else {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL32r;
+      } else {
+        CallOpc = X86::CALL32r;
+      }
+    }
+    // @LOCALMOD-END
     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
       .addReg(CalleeOp);
 
@@ -2043,10 +2102,21 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     // Direct call.
     assert(GV && "Not a direct call");
     unsigned CallOpc;
-    if (Subtarget->is64Bit())
-      CallOpc = X86::CALL64pcrel32;
-    else
-      CallOpc = X86::CALLpcrel32;
+    // @LOCALMOD-BEGIN
+    if (Subtarget->is64Bit()) {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALL64pcrel32;
+      } else {
+        CallOpc = X86::CALL64pcrel32;
+      }
+    } else {
+      if (Subtarget->isTargetNaCl()) {
+        CallOpc = X86::NACL_CG_CALLpcrel32;
+      } else {
+        CallOpc = X86::CALLpcrel32;
+      }
+    }
+    // @LOCALMOD-END
 
     // See if we need any target-specific flags on the GV operand.
     unsigned char OpFlags = 0;
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 42b4e73509..b024817891 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -114,6 +114,8 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
+  case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD
+  case X86::NACL_CG_TCRETURNri64: // @LOCALMOD
   case X86::TCRETURNmi64:
   case X86::EH_RETURN:
   case X86::EH_RETURN64: {
@@ -1013,6 +1015,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   case X86::TCRETURNdi64:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64:
+  case X86::NACL_CG_TCRETURNdi64: // @LOCALMOD
+  case X86::NACL_CG_TCRETURNri64: // @LOCALMOD
   case X86::EH_RETURN:
   case X86::EH_RETURN64:
     break;  // These are ok
@@ -1106,6 +1110,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
              RetOpcode == X86::TCRETURNmi ||
              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::NACL_CG_TCRETURNri64 || // @LOCALMOD
+             RetOpcode == X86::NACL_CG_TCRETURNdi64 || // @LOCALMOD
              RetOpcode == X86::TCRETURNmi64) {
     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
     // Tail call return: adjust the stack pointer and jump to callee.
@@ -1132,10 +1138,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     }
 
     // Jump to label or value in register.
-    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64 ||
+        RetOpcode == X86::NACL_CG_TCRETURNdi64) { // @LOCALMOD
+      // @LOCALMOD-BEGIN
+      unsigned TailJmpOpc;
+      switch (RetOpcode) {
+      case X86::TCRETURNdi  : TailJmpOpc = X86::TAILJMPd; break;
+      case X86::TCRETURNdi64: TailJmpOpc = X86::TAILJMPd64; break;
+      case X86::NACL_CG_TCRETURNdi64:
+        TailJmpOpc = X86::NACL_CG_TAILJMPd64;
+        break;
+      default: llvm_unreachable("Unexpected return opcode");
+      }
+      // @LOCALMOD-END
       MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
-                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+        BuildMI(MBB, MBBI, DL, TII.get(TailJmpOpc)); // @LOCALMOD
+
       if (JumpTarget.isGlobal())
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                              JumpTarget.getTargetFlags());
@@ -1153,6 +1171,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     } else if (RetOpcode == X86::TCRETURNri64) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
         addReg(JumpTarget.getReg(), RegState::Kill);
+// @LOCALMOD-BEGIN
+    } else if (RetOpcode == X86::NACL_CG_TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::NACL_CG_TAILJMPr64)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+// @LOCALMOD-END
     } else {
       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
         addReg(JumpTarget.getReg(), RegState::Kill);
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 968b3583c3..cfd0d95c77 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -211,6 +211,10 @@ namespace {
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &NodeWithChain);
+    // @LOCALMOD-BEGIN
+    void LegalizeAddressingModeForNaCl(SDValue N, X86ISelAddressMode &AM);
+    // @LOCALMOD-END
+
 
     bool TryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
@@ -228,8 +232,9 @@ namespace {
     inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base,
                                    SDValue &Scale, SDValue &Index,
                                    SDValue &Disp, SDValue &Segment) {
+      EVT MemOpVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;  // @LOCALMOD
       Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
-        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, MemOpVT) : // @LOCALMOD
         AM.Base_Reg;
       Scale = getI8Imm(AM.Scale);
       Index = AM.IndexReg;
@@ -289,6 +294,15 @@ namespace {
     const X86InstrInfo *getInstrInfo() const {
       return getTargetMachine().getInstrInfo();
     }
+
+    // @LOCALMOD-START
+    bool selectingMemOp;
+    bool RestrictUseOfBaseReg() {
+      return selectingMemOp && Subtarget->isTargetNaCl64();
+    }
+    // @LOCALMOD-END
+
+
   };
 }
 
@@ -444,8 +458,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
 
     if (OptLevel != CodeGenOpt::None &&
-        // Only does this when target favors doesn't favor register indirect
-        // call.
+        !Subtarget->isTargetNaCl() &&   // @LOCALMOD: We can't fold load/call
         ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
          (N->getOpcode() == X86ISD::TC_RETURN &&
           // Only does this if load can be folded into TC_RETURN.
@@ -593,6 +606,18 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
     if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
         !isDispSafeForFrameIndex(Val))
       return true;
+    // LOCALMOD-BEGIN
+    // Do not fold large offsets into displacements.
+    // Various constant folding and address-mode selections can result in
+    // 32-bit operations (e.g. from GEP) getting folded into the displacement
+    // and often results in a negative value in the index register
+    // (see also LegalizeAddressModeForNaCl)
+    else if (Subtarget->isTargetNaCl64() &&
+             (AM.BaseType == X86ISelAddressMode::RegBase ||
+              AM.BaseType == X86ISelAddressMode::FrameIndexBase) &&
+             (Val > 65535 || Val < -65536) && selectingMemOp)
+      return true;
+    // LOCALMOD-END
   }
   AM.Disp = Val;
   return false;
@@ -602,6 +627,14 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
 
+  // @LOCALMOD-START
+  // Disable this tls access optimization in Native Client, since
+  // gs:0 (or fs:0 on X86-64) does not exactly contain its own address.
+  if (Subtarget->isTargetNaCl()) {
+    return true;
+  }
+  // @LOCALMOD-END
+    
   // load gs:0 -> GS segment register.
   // load fs:0 -> FS segment register.
   //
@@ -726,6 +759,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   if (MatchAddressRecursively(N, AM, 0))
     return true;
 
+
+  if (!RestrictUseOfBaseReg()) {   // @LOCALMOD
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
   // a smaller encoding and avoids a scaled-index.
   if (AM.Scale == 2 &&
@@ -734,7 +769,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
     AM.Base_Reg = AM.IndexReg;
     AM.Scale = 1;
   }
-
+  } // @LOCALMOD
+  
   // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
   // because it has a smaller encoding.
   // TODO: Which other code models can use this?
@@ -1081,6 +1117,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // FALL THROUGH
   case ISD::MUL:
   case X86ISD::MUL_IMM:
+    // @LOCALMOD
+    if (!RestrictUseOfBaseReg()) {
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         AM.Base_Reg.getNode() == 0 &&
@@ -1113,6 +1151,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           return false;
         }
     }
+    } // @LOCALMOD
     break;
 
   case ISD::SUB: {
@@ -1199,6 +1238,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       return false;
     AM = Backup;
 
+    if (!RestrictUseOfBaseReg()) { // @LOCALMOD
     // If we couldn't fold both operands into the address at the same time,
     // see if we can just put each operand into a register and fold at least
     // the add.
@@ -1211,6 +1251,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       AM.Scale = 1;
       return false;
     }
+    } // @LOCALMOD
     N = Handle.getValue();
     break;
   }
@@ -1270,7 +1311,15 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
 /// specified addressing mode without any further recursion.
 bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
-  // Is the base register already occupied?
+  if (RestrictUseOfBaseReg()) { // @LOCALMOD
+    if (AM.IndexReg.getNode() == 0) {
+      AM.IndexReg = N;
+      AM.Scale = 1;
+      return false;
+    }
+    return true;
+  } // @LOCALMOD
+// Is the base register already occupied?
   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
     // If so, check to see if the scale index register is set.
     if (AM.IndexReg.getNode() == 0) {
@@ -1300,6 +1349,8 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
+  // @LOCALMOD
+  selectingMemOp = true;
 
   if (Parent &&
       // This list of opcodes are all the nodes that have an "addr:$ptr" operand
@@ -1321,7 +1372,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
   if (MatchAddress(N, AM))
     return false;
 
-  EVT VT = N.getValueType();
+  // @LOCALMOD-START
+  if (Subtarget->isTargetNaCl64()) {
+      LegalizeAddressingModeForNaCl(N, AM);
+  }
+  // @LOCALMOD-END
+
+  EVT VT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; // @LOCALMOD
+
   if (AM.BaseType == X86ISelAddressMode::RegBase) {
     if (!AM.Base_Reg.getNode())
       AM.Base_Reg = CurDAG->getRegister(0, VT);
@@ -1331,6 +1389,32 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
     AM.IndexReg = CurDAG->getRegister(0, VT);
 
   getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+
+  // @LOCALMOD-BEGIN
+  // For Native Client 64-bit, zero-extend 32-bit pointers
+  // to 64-bits for memory operations.  Most of the time, this
+  // won't generate any additional instructions because the backend
+  // knows that operations on 32-bit registers implicitly zero-extends.
+  // If we don't do this, there are a few corner cases where LLVM might
+  // assume the upper bits won't be modified or used, but since we
+  // always clear the upper bits, this is not a good assumption.
+  // http://code.google.com/p/nativeclient/issues/detail?id=1564
+  if (Subtarget->isTargetNaCl64()) {
+    assert(Base.getValueType() == MVT::i64 && "Unexpected base operand size");
+
+    if (Index.getValueType() != MVT::i64) {
+      Index = CurDAG->getZExtOrTrunc(Index, Index.getDebugLoc(), MVT::i64);
+      // Insert the new node into the topological ordering.
+      if (Parent &&
+          (Index->getNodeId() == -1 ||
+           Index->getNodeId() > Parent->getNodeId())) {
+        CurDAG->RepositionNode(Parent, Index.getNode());
+        Index->setNodeId(Parent->getNodeId());
+      }
+    }
+  }
+  // @LOCALMOD-END
+
   return true;
 }
 
@@ -1393,6 +1477,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   SDValue Copy = AM.Segment;
   SDValue T = CurDAG->getRegister(0, MVT::i32);
   AM.Segment = T;
+  // @LOCALMOD
+  selectingMemOp = false;
   if (MatchAddress(N, AM))
     return false;
   assert (T == AM.Segment);
@@ -1456,7 +1542,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
   AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   AM.SymbolFlags = GA->getTargetFlags();
 
-  if (N.getValueType() == MVT::i32) {
+  if (N.getValueType() == MVT::i32 && 
+      !Subtarget->isTargetNaCl64()) {   // @LOCALMOD
     AM.Scale = 1;
     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
   } else {
@@ -1481,6 +1568,145 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
+// @LOCALMOD-BEGIN
+// LegalizeAddressingModeForNaCl - NaCl specific addressing fixes.  This ensures
+// two addressing mode invariants.
+//
+//   case 1. Addressing using only a displacement (constant address references)
+//   is only legal when the displacement is positive.  This is because, when
+//   later we replace
+//     movl 0xffffffff, %eax
+//   by
+//     movl 0xffffffff(%r15), %eax
+//   the displacement becomes a negative offset from %r15, making this a
+//   reference to the guard region below %r15 rather than to %r15 + 4GB - 1,
+//   as the programmer expected.  To handle these cases we pull negative
+//   displacements out whenever there is no base or index register in the
+//   addressing mode.  I.e., the above becomes
+//     movl $0xffffffff, %ebx
+//     movl %rbx, %rbx
+//     movl (%r15, %rbx, 1), %eax
+//
+//   case 2. Because NaCl needs to zero the top 32-bits of the index, we can't
+//   allow the index register to be negative. However, if we are using a base
+//   frame index, global address or the constant pool, and AM.Disp > 0, then
+//   negative values of "index" may be expected to legally occur.
+//   To avoid this, we fold the displacement (and scale) back into the
+//   index. This results in a LEA before the current instruction.
+//   Unfortunately, this may add a requirement for an additional register.
+//
+//   For example, this sandboxed code is broken if %eax is negative:
+//
+//     movl %eax,%eax
+//     incl -30(%rbp,%rax,4)
+//
+//   Instead, we now generate:
+//     leal -30(%rbp,%rax,4), %tmp
+//     movl %tmp,%tmp
+//     incl (%r15,%tmp,1)
+//
+//  TODO(espindola): This might not be complete since the matcher can select
+//  any dag node to go in the index. This is also not how the rest of the
+//  matcher logic works, if the matcher selects something, it must be
+//  valid and not depend on further patching. A more desirable fix is
+//  probably to update the matching code to avoid assigning a register
+//  to a value that we cannot prove is positive.
+//
+//  Note: Any changes to the testing logic need to be synchronized
+//  with the implementation of isLegalAddressingModeForNaCl() in
+//  X86FastISel.cpp.
+void X86DAGToDAGISel::LegalizeAddressingModeForNaCl(SDValue N,
+                                                    X86ISelAddressMode &AM) {
+
+
+  // RIP-relative addressing is always fine.
+  if (AM.isRIPRelative())
+    return;
+
+  DebugLoc dl = N->getDebugLoc();
+  // Case 1 above:
+  if (!AM.hasBaseOrIndexReg() && !AM.hasSymbolicDisplacement() && AM.Disp < 0) {
+    SDValue Imm = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+    SDValue MovNode =
+      SDValue(CurDAG->getMachineNode(X86::MOV32ri, dl, MVT::i32, Imm), 0);
+    AM.IndexReg = MovNode;
+    AM.Disp = 0;
+    InsertDAGNode(*CurDAG, N, MovNode);
+    return;
+  }
+
+  // MatchAddress wants to use the base register when there's only
+  // one register and no scale. We need to use the index register instead.
+  if (AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() &&
+      !AM.IndexReg.getNode()) {
+    AM.IndexReg = AM.Base_Reg;
+    AM.setBaseReg(SDValue());
+  }
+
+  // Case 2 above comprises two sub-cases:
+  // sub-case 1: Prevent negative indexes
+  bool NeedsFixing1 =
+       (AM.BaseType == X86ISelAddressMode::FrameIndexBase || AM.GV || AM.CP) &&
+       AM.IndexReg.getNode() &&
+       AM.Disp > 0;
+
+  // sub-case 2: Both index and base registers are being used
+  bool NeedsFixing2 =
+       (AM.BaseType == X86ISelAddressMode::RegBase) &&
+       AM.Base_Reg.getNode() &&
+       AM.IndexReg.getNode();
+
+  if (!NeedsFixing1 && !NeedsFixing2)
+    return;
+
+  static const unsigned LogTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  assert(AM.Scale < sizeof(LogTable)/sizeof(LogTable[0]));
+  unsigned ScaleLog = LogTable[AM.Scale];
+  assert(ScaleLog <= 3);
+  SmallVector<SDNode*, 8> NewNodes;
+
+  SDValue NewIndex = AM.IndexReg;
+  if (ScaleLog > 0) {
+    SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+    NewNodes.push_back(ShlCount.getNode());
+    SDValue ShlNode = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                      NewIndex, ShlCount);
+    NewNodes.push_back(ShlNode.getNode());
+    NewIndex = ShlNode;
+  }
+  if (AM.Disp > 0) {
+    SDValue DispNode = CurDAG->getConstant(AM.Disp, N.getValueType());
+    NewNodes.push_back(DispNode.getNode());
+
+    SDValue AddNode = CurDAG->getNode(ISD::ADD, dl, N.getValueType(),
+                                  NewIndex, DispNode);
+    NewNodes.push_back(AddNode.getNode());
+    NewIndex = AddNode;
+  }
+
+  if (NeedsFixing2) {
+    SDValue AddBase = CurDAG->getNode(ISD::ADD, dl, N.getValueType(),
+                                      NewIndex, AM.Base_Reg);
+    NewNodes.push_back(AddBase.getNode());
+    NewIndex = AddBase;
+    AM.setBaseReg(SDValue());
+  }
+  AM.Disp = 0;
+  AM.Scale = 1;
+  AM.IndexReg = NewIndex;
+
+  // Insert the new nodes into the topological ordering.
+  for (unsigned i=0; i < NewNodes.size(); i++) {
+    if (NewNodes[i]->getNodeId() == -1 ||
+        NewNodes[i]->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), NewNodes[i]);
+      NewNodes[i]->setNodeId(N.getNode()->getNodeId());
+    }
+  }
+}
+// @LOCALMOD-END
+
 /// getGlobalBaseReg - Return an SDNode that returns the value of
 /// the global base register. Output instructions required to
 /// initialize the global base register, if necessary.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index f69f5d85f7..28802217fa 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -151,6 +151,12 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
 
   if (Subtarget->isTargetLinux())
     return new X86LinuxTargetObjectFile();
+    
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl())
+    return new TargetLoweringObjectFileNaCl();
+  // @LOCALMOD-END
+
   if (Subtarget->isTargetELF())
     return new TargetLoweringObjectFileELF();
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
@@ -163,6 +169,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
   Subtarget = &TM.getSubtarget<X86Subtarget>();
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
+  // @LOCALMOD-START
+  X86StackPtr = Subtarget->has64BitPointers() ? X86::RSP : X86::ESP;
+  // @LOCALMOD-END
+
   RegInfo = TM.getRegisterInfo();
   TD = getDataLayout();
 
@@ -202,7 +212,7 @@ void X86TargetLowering::resetOperationActions() {
     setSchedulingPreference(Sched::ILP);
   else
     setSchedulingPreference(Sched::RegPressure);
-  setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
+  setStackPointerRegisterToSaveRestore(X86StackPtr); // @LOCALMOD
 
   // Bypass expensive divides on Atom when compiling with O2
   if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
@@ -566,7 +576,7 @@ void X86TargetLowering::resetOperationActions() {
   setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
   setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
-  if (Subtarget->is64Bit()) {
+  if (Subtarget->has64BitPointers()) {
     setExceptionPointerRegister(X86::RAX);
     setExceptionSelectorRegister(X86::RDX);
   } else {
@@ -597,13 +607,16 @@ void X86TargetLowering::resetOperationActions() {
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 
   if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Custom);
   else if (TM.Options.EnableSegmentedStacks)
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Custom);
   else
-    setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+    setOperationAction(ISD::DYNAMIC_STACKALLOC,
+                       Subtarget->has64BitPointers() ? // @LOCALMOD
                        MVT::i64 : MVT::i32, Expand);
 
   if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
@@ -1343,6 +1356,14 @@ void X86TargetLowering::resetOperationActions() {
     setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::XOR);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    setOperationAction(ISD::NACL_TP_TLS_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TP_TDB_OFFSET,        MVT::i32, Custom);
+    setOperationAction(ISD::NACL_TARGET_ARCH,          MVT::i32, Custom);
+  }
+  // @LOCALMOD-END
+
   computeRegisterProperties();
 
   // On Darwin, -Os means optimize for size without hurting performance,
@@ -1691,7 +1712,16 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     unsigned RetValReg
         = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
           X86::RAX : X86::EAX;
-    Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
+    unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+    // @LOCALMOD-BEGIN
+    if (Subtarget->isTargetNaCl()) {
+      // NaCl 64 uses 32-bit pointers, so there might be some zero-ext needed.
+      SDValue Zext = DAG.getZExtOrTrunc(Val, dl, MVT::i64);
+      Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Zext, Flag);
+    } else {
+      Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
+    }
+    // @LOCALMOD-END
     Flag = Chain.getValue(1);
 
     // RAX/EAX now acts like a return value.
@@ -2427,7 +2457,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     } else if (!IsSibcall && (!isTailCall || isByVal)) {
       assert(VA.isMemLoc());
       if (StackPtr.getNode() == 0)
-        StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD
                                       getPointerTy());
       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
                                              dl, DAG, VA, Flags));
@@ -2517,7 +2547,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
           if (StackPtr.getNode() == 0)
             StackPtr = DAG.getCopyFromReg(Chain, dl,
-                                          RegInfo->getStackRegister(),
+                                          X86StackPtr, // @LOCALMOD
                                           getPointerTy());
           Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
 
@@ -3130,7 +3160,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
-  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+  return DAG.getFrameIndex(ReturnAddrIndex, // @LOCALMOD
+                           Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
 }
 
 bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
@@ -7624,7 +7655,8 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
-           unsigned char OperandFlags, bool LocalDynamic = false) {
+           unsigned char OperandFlags, bool LocalDynamic = false,
+           unsigned Opcode = ISD::DELETED_NODE) { // @LOCALMOD
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   DebugLoc dl = GA->getDebugLoc();
@@ -7633,8 +7665,15 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
                                            GA->getOffset(),
                                            OperandFlags);
 
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
+  // @LOCALMOD - changed type for casting
+  unsigned CallType = LocalDynamic ? X86ISD::TLSBASEADDR
+                                   : X86ISD::TLSADDR;
+
+  // @LOCALMOD-START
+  // If Opcode was explicitly overridden, use it as the call type.
+  if (Opcode != ISD::DELETED_NODE)
+    CallType = Opcode;
+  // @LOCALMOD-END
 
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
@@ -7673,6 +7712,52 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                     X86::RAX, X86II::MO_TLSGD);
 }
 
+// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
+static SDValue
+LowerToTLSExecCall(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                   const EVT PtrVT, TLSModel::Model model, bool is64Bit) {
+
+  // See: http://code.google.com/p/nativeclient/issues/detail?id=1685
+  unsigned char TargetFlag;
+  unsigned Opcode;
+  if (model == TLSModel::LocalExec) {
+    TargetFlag = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+    Opcode = X86ISD::TLSADDR_LE;
+  } else if (model == TLSModel::InitialExec) {
+    TargetFlag = is64Bit ? X86II::MO_GOTTPOFF : X86II::MO_INDNTPOFF;
+    Opcode = X86ISD::TLSADDR_IE;
+  } else {
+    llvm_unreachable("Unknown TLS model");
+  }
+
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::EAX, // PtrVT is 32-bit.
+                    TargetFlag, false, Opcode);
+}
+
+// @LOCALMOD-START
+// Lower TLS accesses to a function call, rather than use segment registers.
+// Lower ISD::GlobalTLSAddress for NaCl 64 bit.
+static SDValue
+LowerToTLSNaCl64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                 const EVT PtrVT, TLSModel::Model model) {
+
+  // See: http://code.google.com/p/nativeclient/issues/detail?id=1685
+  unsigned char TargetFlag;
+  unsigned Opcode;
+  if (model == TLSModel::GeneralDynamic || model == TLSModel::LocalDynamic) {
+    TargetFlag = X86II::MO_TLSGD;
+    Opcode = X86ISD::TLSADDR;
+  } else {
+    return LowerToTLSExecCall(GA, DAG, PtrVT, model, true);
+  }
+
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::EAX, // PtrVT is 32-bit.
+                    TargetFlag, false, Opcode);
+}
+// @LOCALMOD-END
+
 static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
                                            SelectionDAG &DAG,
                                            const EVT PtrVT,
@@ -7778,6 +7863,11 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   if (Subtarget->isTargetELF()) {
     TLSModel::Model model = getTargetMachine().getTLSModel(GV);
 
+    // @LOCALMOD-START
+    if (Subtarget->isTargetNaCl64())
+      return LowerToTLSNaCl64(GA, DAG, getPointerTy(), model);
+    // @LOCALMOD-END
+
     switch (model) {
       case TLSModel::GeneralDynamic:
         if (Subtarget->is64Bit())
@@ -7788,9 +7878,16 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
                                            Subtarget->is64Bit());
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
-        return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+        // @LOCALMOD-START
+        if (llvm::TLSUseCall && Subtarget->isTargetNaCl()) {
+          return LowerToTLSExecCall(GA, DAG, getPointerTy(), model,
+                                    Subtarget->is64Bit());
+        } else {
+          return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
                                    Subtarget->is64Bit(),
-                        getTargetMachine().getRelocationModel() == Reloc::PIC_);
+                         getTargetMachine().getRelocationModel() == Reloc::PIC_);
+        }
+        // @LOCALMOD-END
     }
     llvm_unreachable("Unknown TLS model.");
   }
@@ -8895,13 +8992,31 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
     break;
   }
 
+  // @LOCALMOD-BEGIN
+  // This function only peeks at the data dependencies of the DAG to find
+  // an arith op that also defines EFLAGS.  However, function calls may
+  // clobber EFLAGS and the data dependencies do not show that.
+  // When that occurs, EFLAGS must be copied via PUSHF and POPF.
+  // The problem is that NaCl does not allow PUSHF and POPF.
+  // We could try to detect such clobbers for NaCl, but for now, we
+  // keep this code simple, and bail out for NaCl.  A further
+  // PeepholeOptimizer pass can do a similar optimization
+  // (see optimizeCompareInstr in X86InstrInfo.cpp), so it's not *so*
+  // bad.  This function also converts "add op, -1" to DEC, which can
+  // help fold load/stores:
+  //    (store m, (add (load m), -1)) -> (dec m)
+  // So we lose out on that.
+  // BUG=http://code.google.com/p/nativeclient/issues/detail?id=2711
+  bool ConservativeForNaCl = Subtarget->isTargetNaCl();
+
   // See if we can use the EFLAGS value from the operand instead of
   // doing a separate TEST. TEST always sets OF and CF to 0, so unless
   // we prove that the arithmetic won't overflow, we can't use OF or CF.
-  if (Op.getResNo() != 0 || NeedOF || NeedCF)
+  if (Op.getResNo() != 0 || NeedOF || NeedCF || ConservativeForNaCl)
     // Emit a CMP with 0, which is the TEST pattern.
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
                        DAG.getConstant(0, Op.getValueType()));
+  // @LOCALMOD-END
 
   unsigned Opcode = 0;
   unsigned NumOperands = 0;
@@ -9136,6 +9251,10 @@ static bool isAllOnes(SDValue V) {
 /// if it's possible.
 SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
                                      DebugLoc dl, SelectionDAG &DAG) const {
+   // @LOCALMOD: NaCl validator rejects BT, BTS, and BTC.
+  if (Subtarget->isTargetNaCl())
+    return SDValue();
+  
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
   if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -10066,14 +10185,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
   SDValue Size  = Op.getOperand(1);
   // FIXME: Ensure alignment here
 
-  bool Is64Bit = Subtarget->is64Bit();
-  EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+  bool Has64BitPointers = Subtarget->has64BitPointers(); // @LOCALMOD
+  EVT SPTy = Has64BitPointers ? MVT::i64 : MVT::i32; // @LOCALMOD
 
   if (getTargetMachine().Options.EnableSegmentedStacks) {
     MachineFunction &MF = DAG.getMachineFunction();
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
-    if (Is64Bit) {
+    if (Subtarget->is64Bit()) { // @LOCALMOD
       // The 64 bit implementation of segmented stacks needs to clobber both r10
       // r11. This makes it impossible to use it along with nested parameters.
       const Function *F = MF.getFunction();
@@ -10086,7 +10205,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     }
 
     const TargetRegisterClass *AddrRegClass =
-      getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+      getRegClassFor(Has64BitPointers ? MVT::i64:MVT::i32); // @LOCALMOD
     unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
     Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
     SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@@ -10095,7 +10214,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     return DAG.getMergeValues(Ops1, 2, dl);
   } else {
     SDValue Flag;
-    unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+    unsigned Reg = (Has64BitPointers ? X86::RAX : X86::EAX); // @LOCALMOD
 
     Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
     Flag = Chain.getValue(1);
@@ -10104,7 +10223,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
     Flag = Chain.getValue(1);
 
-    Chain = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
+    Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, // @LOCALMOD
                                SPTy).getValue(1);
 
     SDValue Ops1[2] = { Chain.getValue(0), Chain };
@@ -10133,6 +10252,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   //   fp_offset         (48 - 48 + 8 * 16)
   //   overflow_arg_area (point to parameters coming in memory).
   //   reg_save_area
+  unsigned PointerSize = TD->getPointerSize(0); // @LOCALMOD
   SmallVector<SDValue, 8> MemOps;
   SDValue FIN = Op.getOperand(1);
   // Store gp_offset
@@ -10163,11 +10283,12 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 
   // Store ptr to reg_save_area.
   FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
-                    FIN, DAG.getIntPtrConstant(8));
+                    FIN, DAG.getIntPtrConstant(PointerSize)); // @LOCALMOD
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
                                     getPointerTy());
   Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
-                       MachinePointerInfo(SV, 16), false, false, 0);
+                       MachinePointerInfo(SV, 8+PointerSize), // @LOCALMOD
+                       false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
                      &MemOps[0], MemOps.size());
@@ -10177,7 +10298,8 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() &&
          "LowerVAARG only handles 64-bit va_arg!");
   assert((Subtarget->isTargetLinux() ||
-          Subtarget->isTargetDarwin()) &&
+          Subtarget->isTargetDarwin() ||
+          Subtarget->isTargetNaCl()) && // @LOCALMOD
           "Unhandled target in LowerVAARG");
   assert(Op.getNode()->getNumOperands() == 4);
   SDValue Chain = Op.getOperand(0);
@@ -10253,12 +10375,57 @@ static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
   DebugLoc DL = Op.getDebugLoc();
 
   return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
-                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       // @LOCALMOD-START
+                       // Size is actually 8 + 2 * pointer size and align
+                       // is the pointer ABI alignment but we don't have a
+                       // pointer to TD in this static function
+                       DAG.getIntPtrConstant(Subtarget->has64BitPointers() ?
+                                             24 : 16),
+                       Subtarget->has64BitPointers() ? 8 : 4,
+                       /*isVolatile*/false,
+                       // @LOCALMOD-END
                        false,
                        MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
 }
 
-// getTargetVShiftNode - Handle vector element shifts where the shift amount
+//////////////////////////////////////////////////////////////////////
+// NaCl TLS setup / layout intrinsics.
+// See: native_client/src/untrusted/stubs/tls_params.h
+SDValue X86TargetLowering::LowerNaClTpTlsOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tls_offset (size_t tls_size) {
+  //   return -tls_size;
+  // }
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ISD::SUB, dl, Op.getValueType().getSimpleVT(),
+                     DAG.getConstant(0, Op.getValueType().getSimpleVT()),
+                     Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerNaClTpTdbOffset(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  // ssize_t __nacl_tp_tdb_offset (size_t tdb_size) {
+  //   return 0;
+  // }
+  return DAG.getConstant(0, Op.getValueType().getSimpleVT());
+}
+
+SDValue
+X86TargetLowering::LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const {
+  // int __nacl_target_arch () {
+  //   return (is_64_bit ?
+  //           PnaclTargetArchitectureX86_64 :
+  //           PnaclTargetArchitectureX86_32);
+  // }
+  return DAG.getConstant((Subtarget->is64Bit() ?
+                          PnaclTargetArchitectureX86_64 :
+                          PnaclTargetArchitectureX86_32),
+                         Op.getValueType().getSimpleVT());
+}
+
+//////////////////////////////////////////////////////////////////////
+
+// getTargetVShiftNOde - Handle vector element shifts where the shift amount
 // may or may not be a constant. Takes immediate version of shift as input.
 static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
                                    SDValue SrcOp, SDValue ShAmt,
@@ -10303,11 +10470,37 @@ static SDValue getTargetVShiftNode(unsigned Opc, DebugLoc dl, EVT VT,
   return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
 }
 
-static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
+SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                   SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
+
+  // @LOCALMOD-BEGIN
+  case Intrinsic::nacl_read_tp: {
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    if (Subtarget->is64Bit() || llvm::TLSUseCall) {
+      // Call __nacl_read_tp() to get the thread pointer.
+      unsigned PtrSize = PtrVT.getSizeInBits();
+      IntegerType *PtrTy = Type::getIntNTy(*DAG.getContext(), PtrSize);
+      SDValue ReadTpFunction = DAG.getExternalSymbol("__nacl_read_tp", PtrVT);
+      ArgListTy Args;
+      TargetLowering::CallLoweringInfo CLI(
+          DAG.getEntryNode(), PtrTy,
+          false, false, false, false, 0, CallingConv::C,
+          /*isTailCall=*/false, /*doesNotRet=*/false,
+          /*isReturnValueUsed=*/true,
+          ReadTpFunction, Args, DAG, dl);
+      std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+      return CallResult.first;
+    } else {
+      // Get %gs:0, which contains the thread pointer on x86-32.
+      return DAG.getNode(X86ISD::THREAD_POINTER_FROM_GS, dl, PtrVT);
+    }
+  }
+  // @LOCALMOD-END
+
   // Comparison intrinsics.
   case Intrinsic::x86_sse_comieq_ss:
   case Intrinsic::x86_sse_comilt_ss:
@@ -11053,13 +11246,16 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Handler   = Op.getOperand(2);
   DebugLoc dl       = Op.getDebugLoc();
 
+ 
+  // @LOCALMOD-START
+  bool Has64BitPointers = Subtarget->has64BitPointers();
   EVT PtrVT = getPointerTy();
   unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction());
   assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) ||
           (FrameReg == X86::EBP && PtrVT == MVT::i32)) &&
          "Invalid Frame Register!");
   SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT);
-  unsigned StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX;
+  unsigned StoreAddrReg = (PtrVT == MVT::i64 && Has64BitPointers) ? X86::RCX : X86::ECX;
 
   SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame,
                                  DAG.getIntPtrConstant(RegInfo->getSlotSize()));
@@ -12428,6 +12624,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUB:                return LowerSUB(Op, DAG);
   case ISD::SDIV:               return LowerSDIV(Op, DAG);
   case ISD::FSINCOS:            return LowerFSINCOS(Op, DAG);
+  // @LOCALMOD-BEGIN
+  case ISD::NACL_TP_TLS_OFFSET:    return LowerNaClTpTlsOffset(Op, DAG);
+  case ISD::NACL_TP_TDB_OFFSET:    return LowerNaClTpTdbOffset(Op, DAG);
+  case ISD::NACL_TARGET_ARCH:      return LowerNaClTargetArch(Op, DAG);
+  // @LOCALMOD-END
   }
 }
 
@@ -13400,7 +13601,6 @@ X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI,
   // Copy PhyReg back to virtual register.
   BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3)
     .addReg(PhyReg);
-
   BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB);
 
   mainMBB->addSuccessor(origMainMBB);
@@ -13845,9 +14045,11 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
 
   // Machine Information
+  bool IsNaCl = Subtarget->isTargetNaCl(); // @LOCALMOD
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *AddrRegClass =
+    getRegClassFor(getPointerTy()); // @LOCALMOD
   const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
   DebugLoc DL = MI->getDebugLoc();
 
@@ -13956,29 +14158,39 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   }
 
   // In offsetMBB, emit code to use the reg_save_area.
+  unsigned Opc; // @LOCALMOD
   if (offsetMBB) {
     assert(OffsetReg != 0);
 
     // Read the reg_save_area address.
     unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
-    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+    Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD
+    BuildMI(offsetMBB, DL, TII->get(Opc), RegSaveReg) // @LOCALMOD
       .addOperand(Base)
       .addOperand(Scale)
       .addOperand(Index)
-      .addDisp(Disp, 16)
+      .addDisp(Disp, 8+TD->getPointerSize(0)) // @LOCALMOD
       .addOperand(Segment)
       .setMemRefs(MMOBegin, MMOEnd);
 
     // Zero-extend the offset
-    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
-      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
-        .addImm(0)
-        .addReg(OffsetReg)
-        .addImm(X86::sub_32bit);
+    // @LOCALMOD-BEGIN
+    unsigned OffsetRegExt;
+    if (IsNaCl) {
+      OffsetRegExt = OffsetReg;
+    } else {
+      OffsetRegExt = MRI.createVirtualRegister(AddrRegClass);
+        BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetRegExt)
+          .addImm(0)
+          .addReg(OffsetReg)
+          .addImm(X86::sub_32bit);
+    }
+    // @LOCALMOD-END
 
     // Add the offset to the reg_save_area to get the final address.
-    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
-      .addReg(OffsetReg64)
+    Opc = IsNaCl ? X86::ADD32rr : X86::ADD64rr; // @LOCALMOD
+    BuildMI(offsetMBB, DL, TII->get(Opc), OffsetDestReg)
+      .addReg(OffsetRegExt) // @LOCALMOD
       .addReg(RegSaveReg);
 
     // Compute the offset for the next argument
@@ -14008,7 +14220,8 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
 
   // Load the overflow_area address into a register.
   unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
-  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+  Opc = IsNaCl ? X86::MOV32rm : X86::MOV64rm; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc), OverflowAddrReg)
     .addOperand(Base)
     .addOperand(Scale)
     .addOperand(Index)
@@ -14024,11 +14237,13 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
     unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
 
     // aligned_addr = (addr + (align-1)) & ~(align-1)
-    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+    Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD
+    BuildMI(overflowMBB, DL, TII->get(Opc), TmpReg)
       .addReg(OverflowAddrReg)
       .addImm(Align-1);
 
-    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+    Opc = IsNaCl ? X86::AND32ri : X86::AND64ri32; // @LOCALMOD
+    BuildMI(overflowMBB, DL, TII->get(Opc), OverflowDestReg)
       .addReg(TmpReg)
       .addImm(~(uint64_t)(Align-1));
   } else {
@@ -14039,12 +14254,14 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
   // Compute the next overflow address after this argument.
   // (the overflow address should be kept 8-byte aligned)
   unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
-  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+  Opc = IsNaCl ? X86::ADD32ri : X86::ADD64ri32; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc), NextAddrReg)
     .addReg(OverflowDestReg)
     .addImm(ArgSizeA8);
 
   // Store the new overflow address.
-  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+  Opc = IsNaCl ? X86::MOV32mr : X86::MOV64mr; // @LOCALMOD
+  BuildMI(overflowMBB, DL, TII->get(Opc))
     .addOperand(Base)
     .addOperand(Scale)
     .addOperand(Index)
@@ -14419,6 +14636,25 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
   return BB;
 }
 
+// @LOCALMOD-BEGIN
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredThreadPointerFromGs(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  // This generates "movl %gs:0, %DEST", which fetches the thread
+  // pointer on x86-32.
+  BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), MI->getOperand(0).getReg())
+    .addReg(/*Base=*/0)
+    .addImm(/*Scale=*/1)
+    .addReg(/*IndexReg=*/0)
+    .addImm(/*Disp=*/0)
+    .addReg(/*Segment=*/X86::GS);
+  MI->eraseFromParent();
+  return BB;
+}
+// @LOCALMOD-END
+
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
                                       MachineBasicBlock *BB) const {
@@ -14694,6 +14930,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitLoweredSegAlloca(MI, BB, false);
   case X86::SEG_ALLOCA_64:
     return EmitLoweredSegAlloca(MI, BB, true);
+  // @LOCALMOD-BEGIN
+  case X86::THREAD_POINTER_FROM_GS:
+    return EmitLoweredThreadPointerFromGs(MI, BB);
+  // @LOCALMOD-END
   case X86::TLSCall_32:
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
@@ -14890,6 +15130,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
 
   case X86::VAARG_64:
+  case X86::NACL_CG_VAARG_64:
     return EmitVAARG64WithCustomInserter(MI, BB);
 
   case X86::EH_SjLj_SetJmp32:
@@ -16648,6 +16889,12 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   }
 
   unsigned Bits = VT.getSizeInBits();
+  // @LOCALMOD-START
+  // Due to a limitation in NaCl's 32-bit validator,
+  // 16-bit shld instructions are illegal in 32-bit NaCl.
+  if (Subtarget->isTargetNaCl() && !Subtarget->is64Bit() && Bits == 16)
+    return SDValue();
+  // @LOCALMOD-END
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
@@ -16952,7 +17199,6 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   DebugLoc dl = St->getDebugLoc();
   SDValue StoredVal = St->getOperand(1);
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
   // If we are saving a concatenation of two XMM registers, perform two stores.
   // On Sandy Bridge, 256-bit memory operations are executed by two
   // 128-bit ports. However, on Haswell it is better to issue a single 256-bit
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2727e220d3..320f4d3248 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -220,6 +220,16 @@ namespace llvm {
       // TLSBASEADDR - Thread Local Storage. A call to get the start address
       // of the TLS block for the current module.
       TLSBASEADDR,
+      // @LOCALMOD-BEGIN
+      // TLSADDR_LE - Thread Local Storage. (Local Exec Model)
+      TLSADDR_LE,
+
+      // TLSADDR_IE - Thread Local Storage. (Initial Exec Model)
+      TLSADDR_IE,
+
+      // THREAD_POINTER_FROM_GS - Read thread pointer from %gs:0 on x86-32.
+      THREAD_POINTER_FROM_GS,
+      // @LOCALMOD-END
 
       // TLSCALL - Thread Local Storage.  When calling to an OS provided
       // thunk at the address from an earlier relocation.
@@ -473,6 +483,7 @@ namespace llvm {
   //===--------------------------------------------------------------------===//
   //  X86TargetLowering - X86 Implementation of the TargetLowering interface
   class X86TargetLowering : public TargetLowering {
+
   public:
     explicit X86TargetLowering(X86TargetMachine &TM);
 
@@ -736,6 +747,9 @@ namespace llvm {
     const X86Subtarget *Subtarget;
     const X86RegisterInfo *RegInfo;
     const DataLayout *TD;
+    // @LOCALMOD - This is essentially a revert of r167104
+    /// X86StackPtr - X86 physical register used as stack ptr.
+    unsigned X86StackPtr;
 
     /// Used to store the TargetOptions so that we don't waste time resetting
     /// the operation actions unless we have to.
@@ -839,6 +853,7 @@ namespace llvm {
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
@@ -852,11 +867,18 @@ namespace llvm {
     SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
 
+    // @LOCALMOD-BEGIN
+    SDValue LowerNaClTpTlsOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTpTdbOffset(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerNaClTargetArch(SDValue Op, SelectionDAG &DAG) const;
+    // @LOCALMOD-END
+
+
     // Utility functions to help LowerVECTOR_SHUFFLE & LowerBUILD_VECTOR
     SDValue LowerVectorBroadcast(SDValue Op, SelectionDAG &DAG) const;
     SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
     SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) const;
-
+    
     SDValue LowerVectorAllZeroTest(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerVectorIntExtend(SDValue Op, SelectionDAG &DAG) const;
@@ -923,6 +945,12 @@ namespace llvm {
                                             MachineBasicBlock *BB,
                                             bool Is64Bit) const;
 
+    // @LOCALMOD-BEGIN
+    MachineBasicBlock *EmitLoweredThreadPointerFromGs(
+        MachineInstr *MI,
+        MachineBasicBlock *BB) const;
+    // @LOCALMOD-END
+
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index d9ff0c63c5..402ebdb726 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -92,8 +92,8 @@ def VAARG_64 : I<0, Pseudo,
                  "#VAARG_64 $dst, $ap, $size, $mode, $align",
                  [(set GR64:$dst,
                     (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
-                  (implicit EFLAGS)]>;
-
+                  (implicit EFLAGS)]>,
+                 Requires<[IsNotNaCl]>;
 // Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
 // targets.  These calls are needed to probe the stack when allocating more than
 // 4k bytes in one go. Touching the stack at 4K increments is necessary to
@@ -404,7 +404,7 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
 def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_addr32",
                   [(X86tlsaddr tls32addr:$sym)]>,
-                  Requires<[In32BitMode]>;
+                  Requires<[In32BitMode, IsNotNaCl]>;
 def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
                   "# TLS_base_addr32",
                   [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
@@ -430,6 +430,16 @@ def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
                   Requires<[In64BitMode]>;
 }
 
+// @LOCALMOD-BEGIN
+// NaCl TLS support
+let usesCustomInserter = 1 in {
+  def THREAD_POINTER_FROM_GS :
+    I<0, Pseudo, (outs GR32:$dst), (ins),
+      "# get thread pointer from %gs:0",
+      [(set GR32:$dst, (X86thread_pointer_from_gs))]>;
+}
+// @LOCALMOD-END
+
 // Darwin TLS Support
 // For i386, the address of the thunk is passed on the stack, on return the
 // address of the variable is in %eax.  %ecx is trashed during the function
@@ -1005,9 +1015,9 @@ def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
 def : Pat<(X86call (i64 tglobaladdr:$dst)),
-          (CALL64pcrel32 tglobaladdr:$dst)>;
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsNotNaCl]>;
 def : Pat<(X86call (i64 texternalsym:$dst)),
-          (CALL64pcrel32 texternalsym:$dst)>;
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[IsNotNaCl]>;
 
 // Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
 // can never use callee-saved registers. That is the purpose of the GR64_TC
@@ -1036,7 +1046,7 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
 // callee-saved register.
 def : Pat<(X86tcret (load addr:$dst), imm:$off),
           (TCRETURNmi addr:$dst, imm:$off)>,
-          Requires<[In32BitMode, IsNotPIC]>;
+          Requires<[In32BitMode, IsNotPIC, IsNotNaCl]>;
 
 def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi texternalsym:$dst, imm:$off)>,
@@ -1048,29 +1058,29 @@ def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
 
 def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
           (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, IsNotNaCl]>;
 
 // Don't fold loads into X86tcret requiring more than 6 regs.
 // There wouldn't be enough scratch registers for base+index.
 def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off),
           (TCRETURNmi64 addr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, IsNotNaCl]>;
 
 def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
           (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, IsNotNaCl]>;
 
 def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
           (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
-          Requires<[In64BitMode]>;
+          Requires<[In64BitMode, IsNotNaCl]>;
 
 // Normal calls, with various flavors of addresses.
 def : Pat<(X86call (i32 tglobaladdr:$dst)),
-          (CALLpcrel32 tglobaladdr:$dst)>;
+          (CALLpcrel32 tglobaladdr:$dst)>, Requires<[IsNotNaCl]>;
 def : Pat<(X86call (i32 texternalsym:$dst)),
-          (CALLpcrel32 texternalsym:$dst)>;
+          (CALLpcrel32 texternalsym:$dst)>, Requires<[IsNotNaCl]>;
 def : Pat<(X86call (i32 imm:$dst)),
-          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr, IsNotNaCl]>;
 
 // Comparisons.
 
@@ -1498,19 +1508,19 @@ def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
-                            sub_8bit_hi))>;
+                            sub_8bit_hi))>, Requires<[IsNotNaCl]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
+      Requires<[In64BitMode, IsNotNaCl]>;
 def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
           (MOV8mr_NOREX
             addr:$dst,
             (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
                             sub_8bit_hi))>,
-      Requires<[In64BitMode]>;
+      Requires<[In64BitMode, IsNotNaCl]>;
 
 
 // (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 0e696513d4..4f30cbe115 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -114,7 +114,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                    Sched<[WriteJump]>;
   def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
                      [(brind (loadi32 addr:$dst))], IIC_JMP_MEM>,
-                   Requires<[In32BitMode]>, Sched<[WriteJumpLd]>;
+                   Requires<[In32BitMode,IsNotNaCl]>, Sched<[WriteJumpLd]>;
 
   def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
                      [(brind GR64:$dst)], IIC_JMP_REG>, Requires<[In64BitMode]>,
@@ -132,7 +132,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
                           "ljmp{l}\t{$seg, $off|$off, $seg}", [],
                           IIC_JMP_FAR_PTR>, Sched<[WriteJump]>;
   def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
-                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>,
+                      "ljmp{q}\t{*}$dst", [], IIC_JMP_FAR_MEM>, Requires<[IsNotNaCl]>,
                    Sched<[WriteJump]>;
 
   def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
@@ -166,11 +166,11 @@ let isCall = 1 in
                       Requires<[In32BitMode]>, Sched<[WriteJump]>;
     def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst),
                         "call{l}\t{*}$dst", [(X86call GR32:$dst)], IIC_CALL_RI>,
-                      Requires<[In32BitMode]>, Sched<[WriteJump]>;
+                         Requires<[In32BitMode,IsNotNaCl]>, Sched<[WriteJump]>; // @LOCALMOD
     def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst),
                         "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))],
                         IIC_CALL_MEM>,
-                      Requires<[In32BitMode,FavorMemIndirectCall]>,
+                      Requires<[In32BitMode,IsNotNaCl,FavorMemIndirectCall]>, // @LOCALMOD
                       Sched<[WriteJumpLd]>;
 
     def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
@@ -193,9 +193,20 @@ let isCall = 1 in
     let isAsmParserOnly = 1 in
       def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
                        (outs), (ins i16imm_pcrel:$dst),
-                       "callw\t$dst", []>, OpSize;
+                       "callw\t$dst", []>, OpSize,
+                       Requires<[IsNotNaCl]>; // @LOCALMOD
   }
 
+// @LOCALMOD-BEGIN
+// These CodeGen patterns are normally part of the declaration above.
+// However, we need to be able to disable these patterns for NaCl
+// without disabling the the instruction itself. (so we can use the
+// instruction in assembly input)
+def : Pat<(X86call GR32:$dst),
+          (CALL32r GR32:$dst)>, Requires<[IsNotNaCl]>;
+def : Pat<(X86call (loadi32 addr:$dst)),
+          (CALL32m addr:$dst)>, Requires<[IsNotNaCl]>;
+// @LOCALMOD-END
 
 // Tail call stuff.
 
@@ -220,7 +231,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                    "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in
   def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst),
-                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                   "jmp{l}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>, Requires<[IsNotNaCl]>; // @LOCALMOD
 }
 
 
@@ -238,18 +249,18 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
   def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
                         (outs), (ins i64i32imm_pcrel:$dst),
                         "call{q}\t$dst", [], IIC_CALL_RI>,
-                      Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, IsNotNaCl]>; // @LOCALMOD
   def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst),
                         "call{q}\t{*}$dst", [(X86call GR64:$dst)],
                         IIC_CALL_RI>,
-                      Requires<[In64BitMode]>;
+                        Requires<[In64BitMode, IsNotNaCl]>; // @LOCALMOD
   def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst),
                         "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))],
                         IIC_CALL_MEM>,
-                      Requires<[In64BitMode,FavorMemIndirectCall]>;
+                      Requires<[In64BitMode, IsNotNaCl, FavorMemIndirectCall]>;
 
   def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
-                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>;
+                       "lcall{q}\t{*}$dst", [], IIC_CALL_FAR_MEM>, Requires<[IsNotNaCl]>; // @LOCALMOD
 }
 
 let isCall = 1, isCodeGenOnly = 1 in
@@ -283,5 +294,6 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
 
   let mayLoad = 1 in
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
-                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>;
+                     "jmp{q}\t{*}$dst  # TAILCALL", [], IIC_JMP_MEM>,
+                   Requires<[IsNotNaCl]>; // @LOCALMOD
 }
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index a71e024f4e..c456bec75b 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -56,6 +56,7 @@ def MRM_DC : Format<56>;
 def MRM_DD : Format<57>;
 def MRM_DE : Format<58>;
 def MRM_DF : Format<59>;
+def CustomFrm : Format<62>; // @LOCALMOD
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7c0423f818..dc765d4868 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -276,12 +276,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
                   Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
   }
 
+  // @LOCALMOD-BEGIN
+  unsigned NoForwardForNaCl =
+      tm.getSubtarget<X86Subtarget>().isTargetNaCl() ? TB_NO_FORWARD : 0;
+  // @LOCALMOD-END
+
   static const X86OpTblEntry OpTbl0[] = {
     { X86::BT16ri8,     X86::BT16mi8,       TB_FOLDED_LOAD },
     { X86::BT32ri8,     X86::BT32mi8,       TB_FOLDED_LOAD },
     { X86::BT64ri8,     X86::BT64mi8,       TB_FOLDED_LOAD },
-    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD },
-    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD },
+    { X86::CALL32r,     X86::CALL32m,       TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::CALL64r,     X86::CALL64m,       TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::CMP16ri,     X86::CMP16mi,       TB_FOLDED_LOAD },
     { X86::CMP16ri8,    X86::CMP16mi8,      TB_FOLDED_LOAD },
     { X86::CMP16rr,     X86::CMP16mr,       TB_FOLDED_LOAD },
@@ -308,8 +313,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::IMUL32r,     X86::IMUL32m,       TB_FOLDED_LOAD },
     { X86::IMUL64r,     X86::IMUL64m,       TB_FOLDED_LOAD },
     { X86::IMUL8r,      X86::IMUL8m,        TB_FOLDED_LOAD },
-    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD },
-    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD },
+    { X86::JMP32r,      X86::JMP32m,        TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::JMP64r,      X86::JMP64m,        TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::MOV16ri,     X86::MOV16mi,       TB_FOLDED_STORE },
     { X86::MOV16rr,     X86::MOV16mr,       TB_FOLDED_STORE },
     { X86::MOV32ri,     X86::MOV32mi,       TB_FOLDED_STORE },
@@ -348,8 +353,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::SETOr,       X86::SETOm,         TB_FOLDED_STORE },
     { X86::SETPr,       X86::SETPm,         TB_FOLDED_STORE },
     { X86::SETSr,       X86::SETSm,         TB_FOLDED_STORE },
-    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD },
-    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD },
+    { X86::TAILJMPr,    X86::TAILJMPm,      TB_FOLDED_LOAD | NoForwardForNaCl },
+    { X86::TAILJMPr64,  X86::TAILJMPm64,    TB_FOLDED_LOAD | NoForwardForNaCl },
     { X86::TEST16ri,    X86::TEST16mi,      TB_FOLDED_LOAD },
     { X86::TEST32ri,    X86::TEST32mi,      TB_FOLDED_LOAD },
     { X86::TEST64ri32,  X86::TEST64mi32,    TB_FOLDED_LOAD },
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 3380d8c64e..22630f8ea0 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -103,6 +103,10 @@ def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
+// @LOCALMOD-BEGIN
+def SDT_X86ThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+// @LOCALMOD-END
+
 def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
 
 def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
@@ -216,6 +220,17 @@ def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
 def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
+// @LOCALMOD-BEGIN
+def X86tlsaddr_le : SDNode<"X86ISD::TLSADDR_LE", SDT_X86TLSADDR,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86tlsaddr_ie : SDNode<"X86ISD::TLSADDR_IE", SDT_X86TLSADDR,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86thread_pointer_from_gs :
+  SDNode<"X86ISD::THREAD_POINTER_FROM_GS", SDT_X86ThreadPointer>;
+// @LOCALMOD-END
+
 def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
                         [SDNPHasChain]>;
 
@@ -621,7 +636,7 @@ def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
                              AssemblerPredicate<"Mode64Bit", "64-bit mode">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
-def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">;
+def IsNotNaCl    : Predicate<"!Subtarget->isTargetNaCl()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
 def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
 def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
@@ -1796,6 +1811,12 @@ let Predicates = [HasBMI2] in {
 //===----------------------------------------------------------------------===//
 
 include "X86InstrArithmetic.td"
+
+//===----------------------------------------------------------------------===//
+// NaCl support (@LOCALMOD)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrNaCl.td"
 include "X86InstrCMovSetCC.td"
 include "X86InstrExtension.td"
 include "X86InstrControl.td"
diff --git a/lib/Target/X86/X86InstrNaCl.td b/lib/Target/X86/X86InstrNaCl.td
new file mode 100644
index 0000000000..8a7eebecd7
--- /dev/null
+++ b/lib/Target/X86/X86InstrNaCl.td
@@ -0,0 +1,357 @@
+//====- X86InstrNaCl.td - Describe NaCl Instructions ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the modifications to the X86 instruction set needed for
+// Native Client code generation.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// NaCl specific DAG Nodes.
+//
+
+//===----------------------------------------------------------------------===//
+//
+//                       Native Client Pseudo-Instructions
+//
+// These instructions implement the Native Client pseudo-instructions, such
+// as nacljmp and naclasp.
+//
+// TableGen and MC consider these to be "real" instructions. They can be
+// parsed by the AsmParser and emitted by the AsmStreamer as if they
+// were just regular instructions. They are not marked "Pseudo" because
+// this would imply isCodeGenOnly=1, which would stop them from being
+// parsed by the assembler.
+//
+// These instructions cannot be encoded (written into an object file) by the
+// MCCodeEmitter. Instead, during direct object emission, they get lowered to
+// a sequence of streamer emits. (see X86InstrNaCl.cpp)
+//
+// These instructions should not be used in CodeGen. They have no pattern
+// and lack CodeGen metadata. Instead, the X86NaClRewritePass should
+// generate these instructions after CodeGen is finished.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// 32-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI32<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In32BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP32  : NaClPI32<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET32  : NaClPI32<(outs), (ins), "naclret">;
+  def NACL_RETI32 : NaClPI32<(outs), (ins i16imm:$amt), "naclreti\t$amt">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP32r : NaClPI32<(outs), (ins GR32:$dst), "nacljmp\t$dst">;
+}
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL32d : NaClPI32<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL32r : NaClPI32<(outs), (ins GR32:$dst),
+                     "naclcall\t$dst">;
+}
+
+// nacltlsaddr32 gets rewritten to:
+//     .bundle_align_end
+//     .bundle_lock
+//     leal\t$sym@TLSGD, %eax
+//     call\t___tls_get_addr@PLT
+//     .bundle_unlock
+// (The linker expects the leal+call sequence to be directly adjacent)
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP],
+    isAsmParserOnly = 1 in
+def NACL_TLS_addr32 : NaClPI32<(outs), (ins i32mem:$sym),
+                      "nacltlsaddr32\t$sym">;
+
+//===----------------------------------------------------------------------===//
+// 64-bit Native Client Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+class NaClPI64<dag outs, dag ins, string asm>
+  : I<0, CustomFrm, outs, ins, asm, []>, Requires<[IsNaCl, In64BitMode]>;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1, isAsmParserOnly = 1 in {
+  def NACL_TRAP64  : NaClPI64<(outs), (ins), "nacltrap">;
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP, isAsmParserOnly = 1 in {
+  def NACL_RET64  : NaClPI64<(outs), (ins), "naclret">;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1,
+    isAsmParserOnly = 1 in {
+  def NACL_JMP64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                    "nacljmp\t{$dst, $rZP|$rZP, $dst}">;
+  def NACL_JMP64z : NaClPI64<(outs), (ins GR32:$dst),
+                    "nacljmp\t$dst">;
+}
+
+
+let isCall = 1, isAsmParserOnly = 1 in {
+  def NACL_CALL64d : NaClPI64<(outs), (ins i32imm_pcrel:$dst),
+                     "naclcall\t$dst">;
+  def NACL_CALL64r : NaClPI64<(outs), (ins GR32:$dst, GR64:$rZP),
+                     "naclcall\t$dst,$rZP">;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], isAsmParserOnly = 1 in {
+  def NACL_ASPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ASPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclasp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi8 : NaClPI64<(outs), (ins i64i8imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_SSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclssp{q}\t{$off, $rZP|$rZP, $off}">;
+
+  def NACL_ANDSPi32: NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                   "naclandsp{q}\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], Uses = [RBP], isAsmParserOnly = 1 in {
+  def NACL_SPADJi32  : NaClPI64<(outs), (ins i64i32imm:$off, GR64:$rZP),
+                       "naclspadj\t{$off, $rZP|$rZP, $off}">;
+}
+
+let Defs = [RSP], isAsmParserOnly = 1 in {
+  def NACL_RESTSPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTSPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestsp_noflags\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTSPrz   : NaClPI64<(outs), (ins GR32:$src),
+                       "naclrestsp_noflags\t$src">;
+}
+
+def : MnemonicAlias<"naclrestsp", "naclrestsp_noflags">;
+
+let Defs = [RBP], isAsmParserOnly = 1 in {
+  def NACL_RESTBPr   : NaClPI64<(outs), (ins GR32:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTBPm   : NaClPI64<(outs), (ins i32mem:$src, GR64:$rZP),
+                       "naclrestbp\t{$src, $rZP|$rZP, $src}">;
+  def NACL_RESTBPrz   : NaClPI64<(outs), (ins GR32:$src),
+                       "naclrestbp\t$src">;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Code Generator Instructions (isCodeGenOnly == 1)
+//
+// These instructions exists to make CodeGen work with Native Client's
+// modifications.
+//
+// Many of these instructions exist because of limitations in CodeGen
+// or TableGen, and may become unnecessary in the future.
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 32-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// To avoid a naming conflict between call/naclcall, we have to
+// disable the real CALLpcrel32 and CALL32r instructions when targeting
+// for NaCl. Thus, they need to be produced here.
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+
+    def NACL_CG_CALLpcrel32 : I<0, Pseudo,
+                              (outs), (ins i32imm_pcrel:$dst),
+                              "naclcall\t$dst", []>,
+                              Requires<[IsNaCl, In32BitMode]>;
+    def NACL_CG_CALL32r     : I<0, Pseudo,
+                              (outs), (ins GR32:$dst),
+                              "naclcall\t$dst", [(X86call GR32:$dst)]>,
+                              Requires<[IsNaCl, In32BitMode]>;
+}
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALLpcrel32 tglobaladdr:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALLpcrel32 texternalsym:$dst)>,
+          Requires<[IsNaCl, In32BitMode]>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (NACL_CG_CALLpcrel32 imm:$dst)>,
+          Requires<[IsNaCl, In32BitMode, CallImmAddr]>;
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGen 64-bit
+//
+//===----------------------------------------------------------------------===//
+
+
+// Because pointers are 32-bit on X86-64 Native Client, we need to
+// produce new versions of the JMP64/CALL64 instructions which can accept
+// addresses which are i32 instead of i64.
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def NACL_CG_JMP64r     : I<0, Pseudo, (outs), (ins GR32:$dst),
+                           "nacljmp\t$dst",
+                           [(brind GR32:$dst)]>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    def NACL_CG_CALL64pcrel32 : I<0, Pseudo, (outs),
+                                (ins i32imm_pcrel:$dst),
+                                "naclcall\t$dst", []>,
+                                Requires<[IsNaCl, In64BitMode]>;
+
+    def NACL_CG_CALL64r       : I<0, Pseudo, (outs), (ins GR32:$dst),
+                                "naclcall\t$dst,%r15",
+                                [(X86call GR32:$dst)]>,
+                                Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (NACL_CG_CALL64pcrel32 tglobaladdr:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (NACL_CG_CALL64pcrel32 texternalsym:$dst)>,
+      Requires<[IsNaCl, In64BitMode]>;
+
+// Tail calls
+// Also needed due to the i64 / i32 pointer problem.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+  def NACL_CG_TCRETURNdi64 : I<0, Pseudo, (outs),
+                             (ins i32imm_pcrel:$dst, i32imm:$offset),
+                             "#TC_RETURN $dst $offset", []>,
+                          Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TCRETURNri64 : I<0, Pseudo, (outs),
+                            (ins GR32_TC_64:$dst, i32imm:$offset),
+                            "#TC_RETURN $dst $offset", []>,
+                            Requires<[IsNaCl, In64BitMode]>;
+
+  def NACL_CG_TAILJMPd64 : I<0, Pseudo, (outs),
+                           (ins i32imm_pcrel:$dst),
+                           "jmp\t$dst  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+  def NACL_CG_TAILJMPr64 : I<0, Pseudo, (outs),
+                           (ins GR32_TC_64:$dst),
+                           "nacljmp\t$dst,%r15  # TAILCALL", []>,
+                           Requires<[IsNaCl, In64BitMode]>;
+}
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (NACL_CG_TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+def : Pat<(X86tcret GR32_TC_64:$dst, imm:$off),
+          (NACL_CG_TCRETURNri64 GR32_TC_64:$dst, imm:$off)>,
+	  Requires<[IsNaCl, In64BitMode]>;
+
+// ELF TLS Support
+
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def NACL_CG_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                         ".bundle_align_end"
+                         ".bundle_lock"
+                         "leal\t$sym, %eax; "
+                         "call\t___tls_get_addr@PLT"
+                         ".bundle_unlock",
+                         [(X86tlsaddr tls32addr:$sym)]>,
+                         Requires<[In32BitMode, IsNaCl]>;
+
+// These are lowered in X86NaClRewritePass.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in {
+def NACL_CG_GD_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_LE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+def NACL_CG_IE_TLS_addr64 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In64BitMode]>;
+// For mtls-use-call.
+def NACL_CG_LE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_le tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+def NACL_CG_IE_TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "",
+                            [(X86tlsaddr_ie tls32addr:$sym)]>,
+                            Requires<[IsNaCl, In32BitMode]>;
+}
+
+let usesCustomInserter = 1, Defs = [EFLAGS] in
+def NACL_CG_VAARG_64 : I<0, Pseudo,
+                     (outs GR32:$dst),
+                     (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                     "#NACL_VAARG_64 $dst, $ap, $size, $mode, $align",
+                     [(set GR32:$dst,
+                     (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                     (implicit EFLAGS)]>,
+                     Requires<[IsNaCl, In64BitMode]>;
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index a8a9fd8acc..0a3c7423e0 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -742,7 +742,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *PICBase = MF->getPICBaseSymbol();
     // FIXME: We would like an efficient form for this, so we don't have to do a
     // lot of extra uniquing.
-    OutStreamer.EmitInstruction(MCInstBuilder(X86::CALLpcrel32)
+    // LOCALMOD: For NaCl, the call should be aligned to the end of a bundle. Since the
+    // call is at the end of the bundle, there should be no padding between
+    // the call and the next instruction (the label should still make sense).
+
+    OutStreamer.EmitInstruction(MCInstBuilder(
+       getSubtarget().isTargetNaCl() ? X86::NACL_CALL32d : X86::CALLpcrel32) // @LOCALMOD
       .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
 
     // Emit the label.
diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp
new file mode 100644
index 0000000000..846c72f452
--- /dev/null
+++ b/lib/Target/X86/X86NaClRewritePass.cpp
@@ -0,0 +1,762 @@
+//=== X86NaClRewritePAss.cpp - Rewrite instructions for NaCl SFI --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that ensures stores and loads and stack/frame
+// pointer addresses are within the NaCl sandbox (for x86-64).
+// It also ensures that indirect control flow follows NaCl requirments.
+//
+// The other major portion of rewriting for NaCl is done in X86InstrNaCl.cpp,
+// which is responsible for expanding the NaCl-specific operations introduced
+// here and also the intrinsic functions to support setjmp, etc.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "x86-sandboxing"
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+extern cl::opt<bool> FlagUseZeroBasedSandbox;
+cl::opt<bool> FlagRestrictR15("sfi-restrict-r15",
+                              cl::desc("Restrict use of %r15.  This flag can"
+                                       " be turned off for the zero-based"
+                                       " sandbox model."),
+                              cl::init(true));
+
+namespace {
+  class X86NaClRewritePass : public MachineFunctionPass {
+  public:
+    static char ID;
+    X86NaClRewritePass() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NaCl Rewrites";
+    }
+
+  private:
+
+    const TargetMachine *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const X86Subtarget *Subtarget;
+    bool Is64Bit;
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    void TraceLog(const char *func,
+                  const MachineBasicBlock &MBB,
+                  const MachineBasicBlock::iterator MBBI) const;
+
+    bool ApplyRewrites(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI);
+    bool ApplyStackSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyMemorySFI(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator MBBI);
+
+    bool ApplyFrameSFI(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator MBBI);
+
+    bool ApplyControlSFI(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI);
+
+    bool AlignJumpTableTargets(MachineFunction &MF);
+  };
+
+  char X86NaClRewritePass::ID = 0;
+
+}
+
+static void DumpInstructionVerbose(const MachineInstr &MI) {
+  DEBUG({
+      dbgs() << MI;
+      dbgs() << MI.getNumOperands() << " operands:" << "\n";
+      for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
+        const MachineOperand& op = MI.getOperand(i);
+        dbgs() << "  " << i << "(" << op.getType() << "):" << op << "\n";
+      }
+      dbgs() << "\n";
+    });
+}
+
+static bool IsPushPop(MachineInstr &MI) {
+  const unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+   default:
+    return false;
+   case X86::PUSH64r:
+   case X86::POP64r:
+    return true;
+  }
+}
+
+static bool IsStore(MachineInstr &MI) {
+  return MI.getDesc().mayStore();
+}
+
+static bool IsLoad(MachineInstr &MI) {
+  return MI.getDesc().mayLoad();
+}
+
+static bool IsFrameChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::EBP, NULL) ||
+         MI.modifiesRegister(X86::RBP, NULL);
+}
+
+static bool IsStackChange(MachineInstr &MI) {
+  return MI.modifiesRegister(X86::ESP, NULL) ||
+         MI.modifiesRegister(X86::RSP, NULL);
+}
+
+
+static bool HasControlFlow(const MachineInstr &MI) {
+ return MI.getDesc().isBranch() ||
+        MI.getDesc().isCall() ||
+        MI.getDesc().isReturn() ||
+        MI.getDesc().isTerminator() ||
+        MI.getDesc().isBarrier();
+}
+
+static bool IsDirectBranch(const MachineInstr &MI) {
+  return  MI.getDesc().isBranch() &&
+         !MI.getDesc().isIndirectBranch();
+}
+
+static bool IsRegAbsolute(unsigned Reg) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  const bool RestrictR15 = FlagRestrictR15;
+  assert(UseZeroBasedSandbox || RestrictR15);
+  return (Reg == X86::RSP || Reg == X86::RBP ||
+          (Reg == X86::R15 && RestrictR15));
+}
+
+static bool FindMemoryOperand(const MachineInstr &MI, unsigned* index) {
+  int NumFound = 0;
+  unsigned MemOp = 0;
+  for (unsigned i = 0; i < MI.getNumOperands(); ) {
+    if (isMem(&MI, i)) {
+      NumFound++;
+      MemOp = i;
+      i += X86::AddrNumOperands;
+    } else {
+      i++;
+    }
+  }
+
+  // Intrinsics and other functions can have mayLoad and mayStore to reflect
+  // the side effects of those functions.  This function is used to find
+  // explicit memory references in the instruction, of which there are none.
+  if (NumFound == 0)
+    return false;
+
+  if (NumFound > 1)
+    llvm_unreachable("Too many memory operands in instruction!");
+
+  *index = MemOp;
+  return true;
+}
+
+static unsigned PromoteRegTo64(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i64, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+static unsigned DemoteRegTo32(unsigned RegIn) {
+  if (RegIn == 0)
+    return 0;
+  unsigned RegOut = getX86SubSuperRegister(RegIn, MVT::i32, false);
+  assert(RegOut != 0);
+  return RegOut;
+}
+
+
+//
+// True if this MI restores RSP from RBP with a slight adjustment offset.
+//
+static bool MatchesSPAdj(const MachineInstr &MI) {
+  assert (MI.getOpcode() == X86::LEA64r && "Call to MatchesSPAdj w/ non LEA");
+  const MachineOperand &DestReg = MI.getOperand(0);
+  const MachineOperand &BaseReg = MI.getOperand(1);
+  const MachineOperand &Scale = MI.getOperand(2);
+  const MachineOperand &IndexReg = MI.getOperand(3);
+  const MachineOperand &Offset = MI.getOperand(4);
+  return (DestReg.isReg() && DestReg.getReg() == X86::RSP &&
+          BaseReg.isReg() && BaseReg.getReg() == X86::RBP &&
+          Scale.getImm() == 1 &&
+          IndexReg.isReg() && IndexReg.getReg() == 0 &&
+          Offset.isImm());
+}
+
+void
+X86NaClRewritePass::TraceLog(const char *func,
+                             const MachineBasicBlock &MBB,
+                             const MachineBasicBlock::iterator MBBI) const {
+  DEBUG(dbgs() << "@" << func
+        << "(" << MBB.getName() << ", " << (*MBBI) << ")\n");
+}
+
+bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  TraceLog("ApplyStackSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsStackChange(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  if (MI.getDesc().isCall())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  assert(DestReg == X86::ESP || DestReg == X86::RSP);
+
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::ADD64ri8 : NewOpc = X86::NACL_ASPi8; break;
+  case X86::ADD64ri32: NewOpc = X86::NACL_ASPi32; break;
+  case X86::SUB64ri8 : NewOpc = X86::NACL_SSPi8; break;
+  case X86::SUB64ri32: NewOpc = X86::NACL_SSPi32; break;
+  case X86::AND64ri32: NewOpc = X86::NACL_ANDSPi32; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addImm(MI.getOperand(2).getImm())
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Promote "MOV ESP, EBP" to a 64-bit move
+  if (Opc == X86::MOV32rr && MI.getOperand(1).getReg() == X86::EBP) {
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::MOV64rr));
+    Opc = X86::MOV64rr;
+  }
+
+  // "MOV RBP, RSP" is already safe
+  if (Opc == X86::MOV64rr && MI.getOperand(1).getReg() == X86::RBP) {
+    return true;
+  }
+
+  //  Promote 32-bit lea to 64-bit lea (does this ever happen?)
+  assert(Opc != X86::LEA32r && "Invalid opcode in 64-bit mode!");
+  if (Opc == X86::LEA64_32r) {
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned BaseReg = MI.getOperand(1).getReg();
+    unsigned Scale   = MI.getOperand(2).getImm();
+    unsigned IndexReg = MI.getOperand(3).getReg();
+    assert(DestReg == X86::ESP);
+    assert(Scale == 1);
+    assert(BaseReg == X86::EBP);
+    assert(IndexReg == 0);
+    MI.getOperand(0).setReg(X86::RSP);
+    MI.getOperand(1).setReg(X86::RBP);
+    MI.setDesc(TII->get(X86::LEA64r));
+    Opc = X86::LEA64r;
+  }
+
+  if (Opc == X86::LEA64r && MatchesSPAdj(MI)) {
+    const MachineOperand &Offset = MI.getOperand(4);
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32))
+      .addImm(Offset.getImm())
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr))
+      .addReg(DemoteRegTo32(MI.getOperand(1).getReg()))
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::MOV32rm) {
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPm))
+      .addOperand(MI.getOperand(1)) // Base
+      .addOperand(MI.getOperand(2)) // Scale
+      .addOperand(MI.getOperand(3)) // Index
+      .addOperand(MI.getOperand(4)) // Offset
+      .addOperand(MI.getOperand(5)) // Segment
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DEBUG(DumpInstructionVerbose(MI));
+  llvm_unreachable("Unhandled Stack SFI");
+}
+
+bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  TraceLog("ApplyFrameSFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+
+  if (!IsFrameChange(MI))
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Handle moves to RBP
+  if (Opc == X86::MOV64rr) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+    unsigned SrcReg = MI.getOperand(1).getReg();
+
+    // MOV RBP, RSP is already safe
+    if (SrcReg == X86::RSP)
+      return false;
+
+    // Rewrite: mov %rbp, %rX
+    // To:      naclrestbp %eX, %rZP
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr))
+      .addReg(DemoteRegTo32(SrcReg))
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Handle memory moves to RBP
+  if (Opc == X86::MOV64rm) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    // Zero-based sandbox model uses address clipping
+    if (UseZeroBasedSandbox)
+      return false;
+
+    // Rewrite: mov %rbp, (...)
+    // To:      naclrestbp (...), %rZP
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addOperand(MI.getOperand(1))  // Base
+      .addOperand(MI.getOperand(2))  // Scale
+      .addOperand(MI.getOperand(3))  // Index
+      .addOperand(MI.getOperand(4))  // Offset
+      .addOperand(MI.getOperand(5))  // Segment
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Popping onto RBP
+  // Rewrite to:
+  //   naclrestbp (%rsp), %rZP
+  //   naclasp $8, %rZP
+  //
+  // TODO(pdox): Consider rewriting to this instead:
+  //   .bundle_lock
+  //   pop %rbp
+  //   mov %ebp,%ebp
+  //   add %rZP, %rbp
+  //   .bundle_unlock
+  if (Opc == X86::POP64r) {
+    assert(MI.getOperand(0).getReg() == X86::RBP);
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPm))
+      .addReg(X86::RSP)  // Base
+      .addImm(1)  // Scale
+      .addReg(0)  // Index
+      .addImm(0)  // Offset
+      .addReg(0)  // Segment
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8))
+      .addImm(8)
+      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DEBUG(DumpInstructionVerbose(MI));
+  llvm_unreachable("Unhandled Frame SFI");
+}
+
+bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI) {
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  TraceLog("ApplyControlSFI", MBB, MBBI);
+  MachineInstr &MI = *MBBI;
+
+  if (!HasControlFlow(MI))
+    return false;
+
+  // Direct branches are OK
+  if (IsDirectBranch(MI))
+    return false;
+
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // Rewrite indirect jump/call instructions
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  // 32-bit
+  case X86::JMP32r               : NewOpc = X86::NACL_JMP32r; break;
+  case X86::TAILJMPr             : NewOpc = X86::NACL_JMP32r; break;
+  case X86::NACL_CG_CALL32r      : NewOpc = X86::NACL_CALL32r; break;
+  // 64-bit
+  case X86::NACL_CG_JMP64r       : NewOpc = X86::NACL_JMP64r; break;
+  case X86::NACL_CG_CALL64r      : NewOpc = X86::NACL_CALL64r; break;
+  case X86::NACL_CG_TAILJMPr64   : NewOpc = X86::NACL_JMP64r; break;
+  }
+  if (NewOpc) {
+    MachineInstrBuilder NewMI =
+     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+       .addOperand(MI.getOperand(0));
+    if (Is64Bit) {
+      NewMI.addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // EH_RETURN has a single argment which is not actually used directly.
+  // The argument gives the location where to reposition the stack pointer
+  // before returning. EmitPrologue takes care of that repositioning.
+  // So EH_RETURN just ultimately emits a plain "ret".
+  // RETI returns and pops some number of bytes from the stack.
+  if (Opc == X86::RET || Opc == X86::EH_RETURN || Opc == X86::EH_RETURN64 ||
+      Opc == X86::RETI) {
+    // To maintain compatibility with nacl-as, for now we don't emit naclret.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32));
+    if (Is64Bit) {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX);
+      if (Opc == X86::RETI) {
+        BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi32))
+          .addOperand(MI.getOperand(0))
+          .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      }
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r))
+        .addReg(X86::ECX)
+        .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+      if (Opc == X86::RETI) {
+        BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP)
+          .addReg(X86::ESP)
+          .addOperand(MI.getOperand(0));
+      }
+      BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
+        .addReg(X86::ECX);
+    }
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Rewrite trap
+  if (Opc == X86::TRAP) {
+    // To maintain compatibility with nacl-as, for now we don't emit nacltrap.
+    // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32));
+    BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi))
+      .addReg(Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0) // Base
+      .addImm(1) // Scale
+      .addReg(0) // Index
+      .addImm(0) // Offset
+      .addReg(0) // Segment
+      .addImm(0); // Value
+    MI.eraseFromParent();
+    return true;
+  }
+
+  DEBUG(DumpInstructionVerbose(MI));
+  llvm_unreachable("Unhandled Control SFI");
+}
+
+//
+// Sandboxes loads and stores (64-bit only)
+//
+bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI) {
+  TraceLog("ApplyMemorySFI", MBB, MBBI);
+  assert(Is64Bit);
+  MachineInstr &MI = *MBBI;
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+
+  if (!IsLoad(MI) && !IsStore(MI))
+    return false;
+
+  if (IsPushPop(MI))
+    return false;
+
+  unsigned MemOp;
+  if (!FindMemoryOperand(MI, &MemOp))
+    return false;
+  assert(isMem(&MI, MemOp));
+  MachineOperand &BaseReg  = MI.getOperand(MemOp + 0);
+  MachineOperand &Scale = MI.getOperand(MemOp + 1);
+  MachineOperand &IndexReg  = MI.getOperand(MemOp + 2);
+  //MachineOperand &Disp = MI.getOperand(MemOp + 3);
+  MachineOperand &SegmentReg = MI.getOperand(MemOp + 4);
+
+  // RIP-relative addressing is safe.
+  if (BaseReg.getReg() == X86::RIP)
+    return false;
+
+  // Make sure the base and index are 64-bit registers.
+  IndexReg.setReg(PromoteRegTo64(IndexReg.getReg()));
+  BaseReg.setReg(PromoteRegTo64(BaseReg.getReg()));
+  assert(IndexReg.getSubReg() == 0);
+  assert(BaseReg.getSubReg() == 0);
+
+  bool AbsoluteBase = IsRegAbsolute(BaseReg.getReg());
+  bool AbsoluteIndex = IsRegAbsolute(IndexReg.getReg());
+  unsigned AddrReg = 0;
+
+  if (AbsoluteBase && AbsoluteIndex) {
+    llvm_unreachable("Unexpected absolute register pair");
+  } else if (AbsoluteBase) {
+    AddrReg = IndexReg.getReg();
+  } else if (AbsoluteIndex) {
+    assert(!BaseReg.getReg() && "Unexpected base register");
+    assert(Scale.getImm() == 1);
+    AddrReg = 0;
+  } else {
+    if (!BaseReg.getReg()) {
+      // No base, fill in relative.
+      BaseReg.setReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      AddrReg = IndexReg.getReg();
+    } else if (!UseZeroBasedSandbox) {
+      // Switch base and index registers if index register is undefined.
+      // That is do conversions like "mov d(%r,0,0) -> mov d(%r15, %r, 1)".
+      assert (!IndexReg.getReg()
+              && "Unexpected index and base register");
+      IndexReg.setReg(BaseReg.getReg());
+      Scale.setImm(1);
+      BaseReg.setReg(X86::R15);
+      AddrReg = IndexReg.getReg();
+    } else {
+      llvm_unreachable(
+          "Unexpected index and base register");
+    }
+  }
+
+  if (AddrReg) {
+    assert(!SegmentReg.getReg() && "Unexpected segment register");
+    SegmentReg.setReg(X86::PSEUDO_NACL_SEG);
+    return true;
+  }
+
+  return false;
+}
+
+bool X86NaClRewritePass::ApplyRewrites(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Opc = MI.getOpcode();
+
+  // These direct jumps need their opcode rewritten
+  // and variable operands removed.
+  unsigned NewOpc = 0;
+  switch (Opc) {
+  case X86::NACL_CG_CALLpcrel32  : NewOpc = X86::NACL_CALL32d; break;
+  case X86::TAILJMPd             : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_TAILJMPd64   : NewOpc = X86::JMP_4; break;
+  case X86::NACL_CG_CALL64pcrel32: NewOpc = X86::NACL_CALL64d; break;
+  }
+  if (NewOpc) {
+    BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
+      .addOperand(MI.getOperand(0));
+    MI.eraseFromParent();
+    return true;
+  }
+
+  if (Opc == X86::NACL_CG_TLS_addr32) {
+    // Rewrite to nacltlsaddr32
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_TLS_addr32))
+      .addOperand(MI.getOperand(0))  // Base
+      .addOperand(MI.getOperand(1))  // Scale
+      .addOperand(MI.getOperand(2))  // Index
+      .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, X86II::MO_TLSGD)
+      .addOperand(MI.getOperand(4)); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // General Dynamic NaCl TLS model
+  // http://code.google.com/p/nativeclient/issues/detail?id=1685
+  if (Opc == X86::NACL_CG_GD_TLS_addr64) {
+
+    // Rewrite to:
+    //   leaq $sym@TLSGD(%rip), %rdi
+    //   naclcall __tls_get_addr@PLT
+    BuildMI(MBB, MBBI, DL, TII->get(X86::LEA64r), X86::RDI)
+        .addReg(X86::RIP) // Base
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_CALL64d))
+        .addExternalSymbol("__tls_get_addr", X86II::MO_PLT);
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Local Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_LE_TLS_addr64 ||
+      Opc == X86::NACL_CG_LE_TLS_addr32) {
+    unsigned CallOpc, LeaOpc, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   lea $sym@flag(,%reg), %reg
+    if (Opc == X86::NACL_CG_LE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      LeaOpc = X86::LEA64r;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      LeaOpc = X86::LEA32r;
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(LeaOpc), Reg)
+        .addReg(0) // Base
+        .addImm(1) // Scale
+        .addReg(Reg) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  // Initial Exec NaCl TLS Model
+  if (Opc == X86::NACL_CG_IE_TLS_addr64 ||
+      Opc == X86::NACL_CG_IE_TLS_addr32) {
+    unsigned CallOpc, AddOpc, Base, Reg;
+    // Rewrite to:
+    //   naclcall __nacl_read_tp@PLT
+    //   addq sym@flag(%base), %reg
+    if (Opc == X86::NACL_CG_IE_TLS_addr64) {
+      CallOpc = X86::NACL_CALL64d;
+      AddOpc = X86::ADD64rm;
+      Base = X86::RIP;
+      Reg = X86::RAX;
+    } else {
+      CallOpc = X86::NACL_CALL32d;
+      AddOpc = X86::ADD32rm;
+      Base = MI.getOperand(3).getTargetFlags() == X86II::MO_INDNTPOFF ?
+          0 : X86::EBX; // EBX for GOTNTPOFF.
+      Reg = X86::EAX;
+    }
+    BuildMI(MBB, MBBI, DL, TII->get(CallOpc))
+        .addExternalSymbol("__nacl_read_tp", X86II::MO_PLT);
+    BuildMI(MBB, MBBI, DL, TII->get(AddOpc), Reg)
+        .addReg(Reg)
+        .addReg(Base)
+        .addImm(1) // Scale
+        .addReg(0) // Index
+        .addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
+                          MI.getOperand(3).getTargetFlags())
+        .addReg(0); // Segment
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+bool X86NaClRewritePass::AlignJumpTableTargets(MachineFunction &MF) {
+  bool Modified = true;
+
+  MF.setAlignment(5); // log2, 32 = 2^5
+
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI != NULL) {
+    const std::vector<MachineJumpTableEntry> &JT = JTI->getJumpTables();
+    for (unsigned i = 0; i < JT.size(); ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      for (unsigned j = 0; j < MBBs.size(); ++j) {
+        MBBs[j]->setAlignment(5);
+        Modified |= true;
+      }
+    }
+  }
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineFunction(MachineFunction &MF) {
+  bool Modified = false;
+
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TRI = TM->getRegisterInfo();
+  Subtarget = &TM->getSubtarget<X86Subtarget>();
+  Is64Bit = Subtarget->is64Bit();
+
+  assert(Subtarget->isTargetNaCl() && "Unexpected target in NaClRewritePass!");
+
+  DEBUG(dbgs() << "*************** NaCl Rewrite Pass ***************\n");
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end();
+       MFI != E;
+       ++MFI) {
+    Modified |= runOnMachineBasicBlock(*MFI);
+  }
+  Modified |= AlignJumpTableTargets(MF);
+  DEBUG(dbgs() << "*************** NaCl Rewrite DONE  ***************\n");
+  return Modified;
+}
+
+bool X86NaClRewritePass::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  if (MBB.hasAddressTaken()) {
+    //FIXME: use a symbolic constant or get this value from some configuration
+    MBB.setAlignment(5);
+    Modified = true;
+  }
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), NextMBBI = MBBI;
+       MBBI != MBB.end(); MBBI = NextMBBI) {
+    ++NextMBBI;
+    // When one of these methods makes a change,
+    // it returns true, skipping the others.
+    if (ApplyRewrites(MBB, MBBI) ||
+        (Is64Bit && ApplyStackSFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyMemorySFI(MBB, MBBI)) ||
+        (Is64Bit && ApplyFrameSFI(MBB, MBBI)) ||
+        ApplyControlSFI(MBB, MBBI)) {
+      Modified = true;
+    }
+  }
+  return Modified;
+}
+
+/// createX86NaClRewritePassPass - returns an instance of the pass.
+namespace llvm {
+  FunctionPass* createX86NaClRewritePass() {
+    return new X86NaClRewritePass();
+  }
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 16886e432d..bab08b69df 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -54,6 +54,11 @@ static cl::opt<bool>
 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
 
+// @LOCALMOD-BEGIN
+extern cl::opt<bool> FlagUseZeroBasedSandbox;
+extern cl::opt<bool> FlagRestrictR15;
+// @LOCALMOD-END
+
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
   : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
@@ -261,9 +266,17 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   }
 
   bool CallsEHReturn = MF->getMMI().callsEHReturn();
+  bool IsNaCl = TM.getSubtarget<X86Subtarget>().isTargetNaCl(); // @LOCALMOD
   if (Is64Bit) {
     if (IsWin64)
       return CSR_Win64_SaveList;
+    // @LOCALMOD-BEGIN
+    if (IsNaCl) {
+      if (CallsEHReturn)
+        return CSR_NaCl64EHRet_SaveList;
+      return CSR_NaCl64_SaveList;
+    }
+    // @LOCALMOD-END
     if (CallsEHReturn)
       return CSR_64EHRet_SaveList;
     return CSR_64_SaveList;
@@ -379,6 +392,25 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
+  // @LOCALMOD-START
+  const X86Subtarget& Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
+  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  const bool RestrictR15 = FlagRestrictR15;
+  assert(UseZeroBasedSandbox || RestrictR15);
+  if (Subtarget.isTargetNaCl64()) {
+    if (RestrictR15) {
+      Reserved.set(X86::R15);
+      Reserved.set(X86::R15D);
+      Reserved.set(X86::R15W);
+      Reserved.set(X86::R15B);
+    }
+    Reserved.set(X86::RBP);
+    Reserved.set(X86::EBP);
+    Reserved.set(X86::BP);
+    Reserved.set(X86::BPL);
+  }
+  // @LOCALMOD-END
+
   return Reserved;
 }
 
@@ -649,6 +681,9 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
       return X86::R14D;
     case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
       return X86::R15D;
+    // @LOCALMOD. TODO: possibly revert this after LEA .td fixes
+    case X86::EIP: case X86::RIP:
+      return X86::EIP;
     }
   case MVT::i64:
     switch (Reg) {
@@ -685,6 +720,9 @@ unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
       return X86::R14;
     case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
       return X86::R15;
+    // @LOCALMOD. TODO: possibly revert this after LEA .td fixes
+    case X86::EIP: case X86::RIP:
+      return X86::RIP;
     }
   }
 }
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index be6282a643..f3bfe9b328 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -270,6 +270,9 @@ def CR15 : X86Reg<"cr15", 15>;
 // Pseudo index registers
 def EIZ : X86Reg<"eiz", 4>;
 def RIZ : X86Reg<"riz", 4>;
+  
+def PSEUDO_NACL_SEG : X86Reg<"nacl", 4>; // @LOCALMOD
+
 
 
 //===----------------------------------------------------------------------===//
@@ -336,6 +339,10 @@ def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)>;
 def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)>;
 def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)>;
 def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
+// @LOCALMOD-START
+def GR32_TC_64: RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, ESI, EDI,
+                                                 R8D, R9D, R11D)>;
+// @LOCALMOD-END
 def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
                                                      R8, R9, R11, RIP)>;
 def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index f934fdd859..0eb3099ecc 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -35,6 +35,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
                                          MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    // TODO: Can we allow this optimization for Native Client?
+    // At the very least, pointer size needs to be fixed below.
+    return SDValue();
+  }
+  // @LOCALMOD-END
+
   // If to a segment-relative address space, use the default lowering.
   if (DstPtrInfo.getAddrSpace() >= 256)
     return SDValue();
@@ -190,6 +198,13 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
   if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
     return SDValue();
 
+  // @LOCALMOD-BEGIN
+  if (Subtarget->isTargetNaCl()) {
+    // TODO(pdox): Allow use of the NaCl pseudo-instruction for REP MOV
+    return SDValue();
+  }
+  // @LOCALMOD-END
+
   /// If not DWORD aligned, it is more efficient to call the library.  However
   /// if calling the library is not allowed (AlwaysInline), then soldier on as
   /// the code generated here is better than the long load-store sequence we
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 74da2a929c..c6b5008f66 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -167,7 +167,15 @@ bool X86Subtarget::hasSinCos() const {
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   if (In64BitMode)
     return false;
-  return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+  // @LOCALMOD-BEGIN
+  // BUG= http://code.google.com/p/nativeclient/issues/detail?id=2367
+  // For NaCl dynamic linking we do not want to generate a text relocation to
+  // an absolute address in PIC mode.  Such a situation arises from
+  // test/CodeGen/X86/call-imm.ll with the default implementation.
+  // For other platforms we retain the default behavior.
+  return (isTargetELF() && !isTargetNaCl()) ||
+         TM.getRelocationModel() == Reloc::Static;
+  // @LOCALMOD-END
 }
 
 static bool OSHasAVXSupport() {
@@ -448,10 +456,11 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
          "64-bit code requested on a subtarget that doesn't support it!");
 
   // Stack alignment is 16 bytes on Darwin, Linux and Solaris (both
-  // 32 and 64 bit) and for all 64-bit targets.
+  // 32 and 64 bit), NaCl and for all 64-bit targets.
   if (StackAlignOverride)
     stackAlignment = StackAlignOverride;
   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
+           isTargetNaCl() || // @LOCALMOD
            In64BitMode)
     stackAlignment = 16;
 }
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 66832b989b..273fd40904 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -236,6 +236,9 @@ public:
     return In64BitMode && (TargetTriple.getEnvironment() != Triple::GNUX32);
   }
 
+  // @LOCALMOD
+  bool has64BitPointers() const { return is64Bit() && !isTargetNaCl(); }
+
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 00fa47f80b..e057352a09 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -43,8 +43,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                 getSubtargetImpl()->isTargetWindows()) ?
                "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-"
                "n8:16:32-S32" :
-               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
-               "n8:16:32-S128"),
+               getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
+               "e-p:32:32-s:32-f64:64:64-f32:32:32-f80:128:128-i64:64:64-n8:16:32-S128" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32-S128"),
     InstrInfo(*this),
     TLInfo(*this),
     TSInfo(*this),
@@ -59,12 +60,13 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
                                          Reloc::Model RM, CodeModel::Model CM,
                                          CodeGenOpt::Level OL)
   : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
-    // The x32 ABI dictates the ILP32 programming model for x64.
-    DL(getSubtargetImpl()->isTarget64BitILP32() ?
-        "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
-        "n8:16:32:64-S128" :
-        "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
-        "n8:16:32:64-S128"),
+    DL(getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
+               "e-p:32:32-s:64-f64:64:64-f32:32:32-f80:128:128-i64:64:64-"
+               "n8:16:32:64-S128" : (getSubtargetImpl()->isTarget64BitILP32() ?
+                    "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+                    "n8:16:32:64-S128" :
+                    "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+                    "n8:16:32:64-S128")),
     InstrInfo(*this),
     TLInfo(*this),
     TSInfo(*this),
@@ -221,6 +223,13 @@ bool X86PassConfig::addPreEmitPass() {
     ShouldPrint = true;
   }
 
+  // @LOCALMOD-START
+  if (getX86Subtarget().isTargetNaCl()) {
+    addPass(createX86NaClRewritePass());
+    ShouldPrint = true;
+  }
+  // @LOCALMOD-END
+
   return ShouldPrint;
 }
 
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 871dacd6a1..3347449cb7 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86TargetObjectFile.h"
+#include "X86Subtarget.h"  // @LOCALMOD
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionELF.h"
@@ -47,3 +48,30 @@ X86LinuxTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(TM.Options.UseInitArray);
 }
+
+// @LOCALMOD-START
+// NOTE: this was largely lifted from
+// lib/Target/ARM/ARMTargetObjectFile.cpp
+//
+// The default is .ctors/.dtors while the arm backend uses
+// .init_array/.fini_array
+//
+// Without this the linker defined symbols __fini_array_start and
+// __fini_array_end do not have useful values. c.f.:
+// http://code.google.com/p/nativeclient/issues/detail?id=805
+void TargetLoweringObjectFileNaCl::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  StaticCtorSection =
+    getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+  StaticDtorSection =
+    getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                               ELF::SHF_WRITE |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+}
+// @LOCALMOD-END
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 9d26d389d4..09cb6237b9 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -38,6 +38,13 @@ namespace llvm {
     virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
   };
 
+  // @LOCALMOD-BEGIN
+  class TargetLoweringObjectFileNaCl : public TargetLoweringObjectFileELF {
+  public:
+    virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
+  };
+ // @LOCALMOD-END
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index 2bb6e90590..328bc13cdd 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -6,3 +6,4 @@ add_subdirectory(IPO)
 add_subdirectory(Vectorize)
 add_subdirectory(Hello)
 add_subdirectory(ObjCARC)
+add_subdirectory(NaCl)
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index fa3d72ddcf..b5a05a7f1c 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -58,6 +58,15 @@ namespace {
             continue;
           if (I->getName() == "llvm.global_ctors")
             continue;
+          // @LOCALMOD-BEGIN - this is likely upstreamable
+          // Note: there will likely be more cases once this
+          // is exercises more thorougly.
+	  if (I->getName() == "llvm.global_dtors")
+            continue;
+          // not observed yet 
+          if (I->hasExternalWeakLinkage()) 
+	    continue;
+          // @LOCALMOD-END
         }
 
         bool Local = I->isDiscardableIfUnused();
@@ -78,6 +87,13 @@ namespace {
         if (!Delete) {
           if (I->hasAvailableExternallyLinkage())
             continue;
+          // @LOCALMOD-BEGIN - this is likely upstreamable
+          // Note: there will likely be more cases once this
+          // is exercises more thorougly.
+          // observed for pthread_cancel
+          if (I->hasExternalWeakLinkage())
+	    continue;
+          // @LOCALMOD-END
         }
 
         bool Local = I->isDiscardableIfUnused();
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 0ef900e2b9..6cab6ed0ff 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -83,6 +83,9 @@ namespace {
                                const GlobalStatus &GS);
     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
 
+    // @LOCALMOD: see usage below
+    bool IsUserEntryPointMain(const Function *Func);
+
     DataLayout *TD;
     TargetLibraryInfo *TLI;
   };
@@ -1933,6 +1936,17 @@ bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
   return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
 }
 
+bool GlobalOpt::IsUserEntryPointMain(const Function *Func) {    // @LOCALMOD
+  if (Func->hasOneUse() && Func->getName() == "main") {
+    const User *FuncUser = Func->use_back();
+    if (const CallInst *CallUser = dyn_cast<CallInst>(FuncUser)) {
+      const Function *Caller = CallUser->getParent()->getParent();
+      return Caller->getName() == "_start";
+    }
+  }
+  return false;
+}
+
 /// ProcessInternalGlobal - Analyze the specified global variable and optimize
 /// it if possible.  If we make a change, return true.
 bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
@@ -1951,9 +1965,16 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   if (!GS.HasMultipleAccessingFunctions &&
       GS.AccessingFunction && !GS.HasNonInstructionUser &&
       GV->getType()->getElementType()->isSingleValueType() &&
-      GS.AccessingFunction->getName() == "main" &&
-      GS.AccessingFunction->hasExternalLinkage() &&
+      // @LOCALMOD-BEGIN
+      // The upstream LLVM is looking for an external "main" here. Since in
+      // stable PNaCl bitcode, "main" is internal, we're using a different
+      // heuristic. We're looking for a "main" that is only used in a single
+      // place -- a call from "_start".
+      // TODO: figure out a more proper solution upstream and merge that in.
+      IsUserEntryPointMain(GS.AccessingFunction) &&
+      // @LOCALMOD-END
       GV->getType()->getAddressSpace() == 0) {
+
     DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
                                                    ->getEntryBlock().begin());
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4c252c03d0..518a8323b6 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -232,7 +232,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return 0;
-
+  
   uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
   if (ArrayElementCount > 1024) return 0;  // Don't blow up on huge arrays.
 
@@ -1406,6 +1406,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
     // smaller constant, which will be target friendly.
     unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
     if (LHSI->hasOneUse() &&
+        // @LOCALMOD-BEGIN
+        // We don't want to introduce non-power-of-two integer sizes for PNaCl's
+        // stable wire format, so modify this transformation for NaCl.       
+        isPowerOf2_32(TypeBits - Amt) && (TypeBits - Amt) >= 8 &&
+        // @LOCALMOD-END
         Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
       Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
       Constant *NCI = ConstantExpr::getTrunc(
@@ -2017,13 +2022,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     //
     // sum = a + b
     // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
-    {
-    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
-      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
-        return Res;
+    // @LOCALMOD-BEGIN
+    // This is disabled for PNaCl, because we don't support the
+    // with.overflow intrinsics in PNaCl's stable ABI.
+    if (0) {
+      ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
+      if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+          match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+        if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+          return Res;
     }
+    // @LOCALMOD-END
 
     // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
     if (I.isEquality() && CI->isZero() &&
@@ -2687,21 +2696,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
-
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+    // @LOCALMOD-BEGIN
+    // This is disabled for PNaCl, because we don't support the
+    // with.overflow intrinsics in PNaCl's stable ABI.
+    if (0) {
+      // (a+b) <u a  --> llvm.uadd.with.overflow.
+      // (a+b) <u b  --> llvm.uadd.with.overflow.
+      if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+          match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+          (Op1 == A || Op1 == B))
+        if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+          return R;
+
+      // a >u (a+b)  --> llvm.uadd.with.overflow.
+      // b >u (a+b)  --> llvm.uadd.with.overflow.
+      if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+          match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+          (Op0 == A || Op0 == B))
+        if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+          return R;
+    }
+    // @LOCALMOD-END
   }
 
   if (I.isEquality()) {
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index 15e9fba0a7..3594de54a2 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC NaCl
 
 [component_0]
 type = Group
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index c390517d07..c2a2a6b485 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,12 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC NaCl
+
+
+ifeq ($(NACL_SANDBOX),1)
+  PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
+endif
 
 include $(LEVEL)/Makefile.config
 
diff --git a/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp b/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp
new file mode 100644
index 0000000000..f96db09b2f
--- /dev/null
+++ b/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp
@@ -0,0 +1,71 @@
+//===- AddPNaClExternalDecls.cpp - Add decls for PNaCl external functions -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds function declarations for external functions used by PNaCl.
+// These externals are implemented in native libraries and calls to them are
+// created as part of the translation process.
+//
+// Running this pass is a precondition for running ResolvePNaClIntrinsics. They
+// are separate because one is a ModulePass and the other is a FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a module pass because it adds declarations to the module.
+  class AddPNaClExternalDecls : public ModulePass {
+  public:
+    static char ID;
+    AddPNaClExternalDecls() : ModulePass(ID) {
+      initializeAddPNaClExternalDeclsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+bool AddPNaClExternalDecls::runOnModule(Module &M) {
+  // Add declarations for a pre-defined set of external functions to the module.
+  // The function names must match the functions implemented in native code (in
+  // pnacl/support). The function types must match the types of the LLVM
+  // intrinsics.
+  // We expect these declarations not to exist in the module before this pass
+  // runs, but don't assert it; it will be handled by the ABI verifier.
+  LLVMContext &C = M.getContext();
+  M.getOrInsertFunction("setjmp",
+                        // return type
+                        Type::getInt32Ty(C),
+                        // arguments
+                        Type::getInt8Ty(C)->getPointerTo(),
+                        NULL);
+  M.getOrInsertFunction("longjmp",
+                        // return type
+                        Type::getVoidTy(C),
+                        // arguments
+                        Type::getInt8Ty(C)->getPointerTo(),
+                        Type::getInt32Ty(C),
+                        NULL);
+  return true;
+}
+
+char AddPNaClExternalDecls::ID = 0;
+INITIALIZE_PASS(AddPNaClExternalDecls, "add-pnacl-external-decls",
+                "Add declarations of external functions used by PNaCl",
+                false, false)
+
+ModulePass *llvm::createAddPNaClExternalDeclsPass() {
+  return new AddPNaClExternalDecls();
+}
diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt
new file mode 100644
index 0000000000..9cf164926b
--- /dev/null
+++ b/lib/Transforms/NaCl/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_llvm_library(LLVMNaClTransforms
+  AddPNaClExternalDecls.cpp
+  CanonicalizeMemIntrinsics.cpp
+  ExpandArithWithOverflow.cpp
+  ExpandByVal.cpp
+  ExpandConstantExpr.cpp
+  ExpandCtors.cpp
+  ExpandGetElementPtr.cpp
+  ExpandSmallArguments.cpp
+  ExpandStructRegs.cpp
+  ExpandTls.cpp
+  ExpandTlsConstantExpr.cpp
+  ExpandUtils.cpp
+  ExpandVarArgs.cpp
+  InsertDivideCheck.cpp
+  FlattenGlobals.cpp
+  GlobalCleanup.cpp
+  PNaClABISimplify.cpp
+  PromoteI1Ops.cpp
+  PromoteIntegers.cpp
+  ReplacePtrsWithInts.cpp
+  RewriteLLVMIntrinsics.cpp
+  ResolvePNaClIntrinsics.cpp
+  RewritePNaClLibraryCalls.cpp
+  StripAttributes.cpp
+  StripMetadata.cpp
+  )
+
+add_dependencies(LLVMNaClTransforms intrinsics_gen)
diff --git a/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp b/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp
new file mode 100644
index 0000000000..fd44c65434
--- /dev/null
+++ b/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp
@@ -0,0 +1,95 @@
+//===- CanonicalizeMemIntrinsics.cpp - Make memcpy's "len" arg consistent--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass canonicalizes uses of the llvm.memset, llvm.memcpy and
+// llvm.memmove intrinsics so that the variants with 64-bit "len"
+// arguments aren't used, and the 32-bit variants are used instead.
+//
+// This means the PNaCl translator won't need to handle two versions
+// of each of these intrinsics, and it won't need to do any implicit
+// truncations from 64-bit to 32-bit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because that makes it easier to find all
+  // uses of intrinsics efficiently.
+  class CanonicalizeMemIntrinsics : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    CanonicalizeMemIntrinsics() : ModulePass(ID) {
+      initializeCanonicalizeMemIntrinsicsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char CanonicalizeMemIntrinsics::ID = 0;
+INITIALIZE_PASS(CanonicalizeMemIntrinsics, "canonicalize-mem-intrinsics",
+                "Make memcpy() et al's \"len\" argument consistent",
+                false, false)
+
+static bool expandIntrinsic(Module *M, Intrinsic::ID ID) {
+  SmallVector<Type *, 3> Types;
+  Types.push_back(Type::getInt8PtrTy(M->getContext()));
+  if (ID != Intrinsic::memset)
+    Types.push_back(Type::getInt8PtrTy(M->getContext()));
+  unsigned LengthTypePos = Types.size();
+  Types.push_back(Type::getInt64Ty(M->getContext()));
+
+  std::string OldName = Intrinsic::getName(ID, Types);
+  Function *OldIntrinsic = M->getFunction(OldName);
+  if (!OldIntrinsic)
+    return false;
+
+  Types[LengthTypePos] = Type::getInt32Ty(M->getContext());
+  Function *NewIntrinsic = Intrinsic::getDeclaration(M, ID, Types);
+
+  for (Value::use_iterator CallIter = OldIntrinsic->use_begin(),
+         E = OldIntrinsic->use_end(); CallIter != E; ) {
+    CallInst *Call = dyn_cast<CallInst>(*CallIter++);
+    if (!Call) {
+      report_fatal_error("CanonicalizeMemIntrinsics: Taking the address of an "
+                         "intrinsic is not allowed: " + OldName);
+    }
+    // This temporarily leaves Call non-well-typed.
+    Call->setCalledFunction(NewIntrinsic);
+    // Truncate the "len" argument.  No overflow check.
+    IRBuilder<> Builder(Call);
+    Value *Length = Builder.CreateTrunc(Call->getArgOperand(2),
+                                        Type::getInt32Ty(M->getContext()),
+                                        "mem_len_truncate");
+    Call->setArgOperand(2, Length);
+  }
+  OldIntrinsic->eraseFromParent();
+  return true;
+}
+
+bool CanonicalizeMemIntrinsics::runOnModule(Module &M) {
+  bool Changed = false;
+  Changed |= expandIntrinsic(&M, Intrinsic::memset);
+  Changed |= expandIntrinsic(&M, Intrinsic::memcpy);
+  Changed |= expandIntrinsic(&M, Intrinsic::memmove);
+  return Changed;
+}
+
+ModulePass *llvm::createCanonicalizeMemIntrinsicsPass() {
+  return new CanonicalizeMemIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp b/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp
new file mode 100644
index 0000000000..7113c839d2
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp
@@ -0,0 +1,149 @@
+//===- ExpandArithWithOverflow.cpp - Expand out uses of *.with.overflow----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The llvm.*.with.overflow.*() intrinsics are awkward for PNaCl
+// support because they return structs, and we want to omit struct
+// types from IR in PNaCl's stable ABI.
+//
+// However, llvm.{umul,uadd}.with.overflow.*() are used by Clang to
+// implement an overflow check for C++'s new[] operator.  This pass
+// expands out these uses so that PNaCl does not have to support
+// *.with.overflow as part of PNaCl's stable ABI.
+//
+// This pass only handles adding/multiplying by a constant, which is
+// the only use of *.with.overflow that is currently generated by
+// Clang (unless '-ftrapv' is passed to Clang).
+//
+// X * Const overflows iff X > UINT_MAX / Const, where UINT_MAX is the
+// maximum value for the integer type being used.
+//
+// Similarly, X + Const overflows iff X > UINT_MAX - Const.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that the pass can easily iterate over all
+  // uses of the intrinsics.
+  class ExpandArithWithOverflow : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandArithWithOverflow() : ModulePass(ID) {
+      initializeExpandArithWithOverflowPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandArithWithOverflow::ID = 0;
+INITIALIZE_PASS(ExpandArithWithOverflow, "expand-arith-with-overflow",
+                "Expand out some uses of *.with.overflow intrinsics",
+                false, false)
+
+static uint64_t UintTypeMax(unsigned Bits) {
+  // Avoid doing 1 << 64 because that is undefined on a uint64_t.
+  if (Bits == 64)
+    return ~(uint64_t) 0;
+  return (((uint64_t) 1) << Bits) - 1;
+}
+
+static Value *CreateInsertValue(Value *StructVal, unsigned Index,
+                                Value *Field, Instruction *BasedOn) {
+  SmallVector<unsigned, 1> EVIndexes;
+  EVIndexes.push_back(Index);
+  return CopyDebug(InsertValueInst::Create(
+                       StructVal, Field, EVIndexes,
+                       BasedOn->getName() + ".insert", BasedOn), BasedOn);
+}
+
+static bool ExpandOpForIntSize(Module *M, unsigned Bits, bool Mul) {
+  IntegerType *IntTy = IntegerType::get(M->getContext(), Bits);
+  SmallVector<Type *, 1> Types;
+  Types.push_back(IntTy);
+  Intrinsic::ID ID = (Mul ? Intrinsic::umul_with_overflow
+                          : Intrinsic::uadd_with_overflow);
+  std::string Name = Intrinsic::getName(ID, Types);
+  Function *Intrinsic = M->getFunction(Name);
+  if (!Intrinsic)
+    return false;
+  for (Value::use_iterator CallIter = Intrinsic->use_begin(),
+         E = Intrinsic->use_end(); CallIter != E; ) {
+    CallInst *Call = dyn_cast<CallInst>(*CallIter++);
+    if (!Call) {
+      report_fatal_error("ExpandArithWithOverflow: Taking the address of a "
+                         "*.with.overflow intrinsic is not allowed");
+    }
+    Value *VariableArg;
+    ConstantInt *ConstantArg;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(0))) {
+      VariableArg = Call->getArgOperand(1);
+      ConstantArg = C;
+    } else if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(1))) {
+      VariableArg = Call->getArgOperand(0);
+      ConstantArg = C;
+    } else {
+      errs() << "Use: " << *Call << "\n";
+      report_fatal_error("ExpandArithWithOverflow: At least one argument of "
+                         "*.with.overflow must be a constant");
+    }
+
+    Value *ArithResult = BinaryOperator::Create(
+        (Mul ? Instruction::Mul : Instruction::Add), VariableArg, ConstantArg,
+        Call->getName() + ".arith", Call);
+
+    uint64_t ArgMax;
+    if (Mul) {
+      ArgMax = UintTypeMax(Bits) / ConstantArg->getZExtValue();
+    } else {
+      ArgMax = UintTypeMax(Bits) - ConstantArg->getZExtValue();
+    }
+    Value *OverflowResult = new ICmpInst(
+        Call, CmpInst::ICMP_UGT, VariableArg, ConstantInt::get(IntTy, ArgMax),
+        Call->getName() + ".overflow");
+
+    // Construct the struct result.
+    Value *NewStruct = UndefValue::get(Call->getType());
+    NewStruct = CreateInsertValue(NewStruct, 0, ArithResult, Call);
+    NewStruct = CreateInsertValue(NewStruct, 1, OverflowResult, Call);
+    Call->replaceAllUsesWith(NewStruct);
+    Call->eraseFromParent();
+  }
+  Intrinsic->eraseFromParent();
+  return true;
+}
+
+static bool ExpandForIntSize(Module *M, unsigned Bits) {
+  bool Modified = false;
+  Modified |= ExpandOpForIntSize(M, Bits, true); // Expand umul
+  Modified |= ExpandOpForIntSize(M, Bits, false); // Expand uadd
+  return Modified;
+}
+
+bool ExpandArithWithOverflow::runOnModule(Module &M) {
+  bool Modified = false;
+  Modified |= ExpandForIntSize(&M, 64);
+  Modified |= ExpandForIntSize(&M, 32);
+  Modified |= ExpandForIntSize(&M, 16);
+  Modified |= ExpandForIntSize(&M, 8);
+  return Modified;
+}
+
+ModulePass *llvm::createExpandArithWithOverflowPass() {
+  return new ExpandArithWithOverflow();
+}
diff --git a/lib/Transforms/NaCl/ExpandByVal.cpp b/lib/Transforms/NaCl/ExpandByVal.cpp
new file mode 100644
index 0000000000..7227f3e501
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandByVal.cpp
@@ -0,0 +1,199 @@
+//===- ExpandByVal.cpp - Expand out use of "byval" and "sret" attributes---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out by-value passing of structs as arguments and
+// return values.  In LLVM IR terms, it expands out the "byval" and
+// "sret" function argument attributes.
+//
+// The semantics of the "byval" attribute are that the callee function
+// gets a private copy of the pointed-to argument that it is allowed
+// to modify.  In implementing this, we have a choice between making
+// the caller responsible for making the copy or making the callee
+// responsible for making the copy.  We choose the former, because
+// this matches how the normal native calling conventions work, and
+// because it often allows the caller to write struct contents
+// directly into the stack slot that it passes the callee, without an
+// additional copy.
+//
+// Note that this pass does not attempt to modify functions that pass
+// structs by value without using "byval" or "sret", such as:
+//
+//   define %struct.X @func()                           ; struct return
+//   define void @func(%struct.X %arg)                  ; struct arg
+//
+// The pass only handles functions such as:
+//
+//   define void @func(%struct.X* sret %result_buffer)  ; struct return
+//   define void @func(%struct.X* byval %ptr_to_arg)    ; struct arg
+//
+// This is because PNaCl Clang generates the latter and not the former.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that it can strip attributes from
+  // declared functions as well as defined functions.
+  class ExpandByVal : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandByVal() : ModulePass(ID) {
+      initializeExpandByValPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandByVal::ID = 0;
+INITIALIZE_PASS(ExpandByVal, "expand-byval",
+                "Expand out by-value passing of structs",
+                false, false)
+
+// removeAttribute() currently does not work on Attribute::Alignment
+// (it fails with an assertion error), so we have to take a more
+// convoluted route to removing this attribute by recreating the
+// AttributeSet.
+AttributeSet RemoveAttrs(LLVMContext &Context, AttributeSet Attrs) {
+  SmallVector<AttributeSet, 8> AttrList;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    unsigned Index = Attrs.getSlotIndex(Slot);
+    AttrBuilder AB;
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      if (!Attr->isAlignAttribute() &&
+          Attr->getKindAsEnum() != Attribute::ByVal &&
+          Attr->getKindAsEnum() != Attribute::StructRet) {
+        AB.addAttribute(*Attr);
+      }
+      // IR semantics require that ByVal implies NoAlias.  However, IR
+      // semantics do not require StructRet to imply NoAlias.  For
+      // example, a global variable address can be passed as a
+      // StructRet argument, although Clang does not do so and Clang
+      // explicitly adds NoAlias to StructRet arguments.
+      if (Attr->getKindAsEnum() == Attribute::ByVal) {
+        AB.addAttribute(Attribute::get(Context, Attribute::NoAlias));
+      }
+    }
+    AttrList.push_back(AttributeSet::get(Context, Index, AB));
+  }
+  return AttributeSet::get(Context, AttrList);
+}
+
+// ExpandCall() can take a CallInst or an InvokeInst.  It returns
+// whether the instruction was modified.
+template <class InstType>
+static bool ExpandCall(DataLayout *DL, InstType *Call) {
+  bool Modify = false;
+  AttributeSet Attrs = Call->getAttributes();
+  for (unsigned ArgIdx = 0; ArgIdx < Call->getNumArgOperands(); ++ArgIdx) {
+    unsigned AttrIdx = ArgIdx + 1;
+
+    if (Attrs.hasAttribute(AttrIdx, Attribute::StructRet))
+      Modify = true;
+
+    if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) {
+      Modify = true;
+
+      Value *ArgPtr = Call->getArgOperand(ArgIdx);
+      Type *ArgType = ArgPtr->getType()->getPointerElementType();
+      ConstantInt *ArgSize = ConstantInt::get(
+          Call->getContext(), APInt(64, DL->getTypeStoreSize(ArgType)));
+      unsigned Alignment = Attrs.getParamAlignment(AttrIdx);
+      // In principle, using the alignment from the argument attribute
+      // should be enough.  However, Clang is not emitting this
+      // attribute for PNaCl.  LLVM alloca instructions do not use the
+      // ABI alignment of the type, so this must be specified
+      // explicitly.
+      // See https://code.google.com/p/nativeclient/issues/detail?id=3403
+      unsigned AllocAlignment =
+          std::max(Alignment, DL->getABITypeAlignment(ArgType));
+
+      // Make a copy of the byval argument.
+      Instruction *CopyBuf = new AllocaInst(ArgType, 0, AllocAlignment,
+                                            ArgPtr->getName() + ".byval_copy");
+      Function *Func = Call->getParent()->getParent();
+      Func->getEntryBlock().getInstList().push_front(CopyBuf);
+      IRBuilder<> Builder(Call);
+      Builder.CreateLifetimeStart(CopyBuf, ArgSize);
+      // Using the argument's alignment attribute for the memcpy
+      // should be OK because the LLVM Language Reference says that
+      // the alignment attribute specifies "the alignment of the stack
+      // slot to form and the known alignment of the pointer specified
+      // to the call site".
+      Instruction *MemCpy = Builder.CreateMemCpy(CopyBuf, ArgPtr, ArgSize,
+                                                 Alignment);
+      MemCpy->setDebugLoc(Call->getDebugLoc());
+
+      Call->setArgOperand(ArgIdx, CopyBuf);
+
+      // Mark the argument copy as unused using llvm.lifetime.end.
+      if (isa<CallInst>(Call)) {
+        BasicBlock::iterator It = BasicBlock::iterator(Call);
+        Builder.SetInsertPoint(++It);
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+      } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Call)) {
+        Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+        Builder.SetInsertPoint(Invoke->getUnwindDest()->getFirstInsertionPt());
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+      }
+    }
+  }
+  if (Modify) {
+    Call->setAttributes(RemoveAttrs(Call->getContext(), Attrs));
+
+    if (CallInst *CI = dyn_cast<CallInst>(Call)) {
+      // This is no longer a tail call because the callee references
+      // memory alloca'd by the caller.
+      CI->setTailCall(false);
+    }
+  }
+  return Modify;
+}
+
+bool ExpandByVal::runOnModule(Module &M) {
+  bool Modified = false;
+  DataLayout DL(&M);
+
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    AttributeSet NewAttrs = RemoveAttrs(Func->getContext(),
+                                        Func->getAttributes());
+    Modified |= (NewAttrs != Func->getAttributes());
+    Func->setAttributes(NewAttrs);
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+           Inst != E; ++Inst) {
+        if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Modified |= ExpandCall(&DL, Call);
+        } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+          Modified |= ExpandCall(&DL, Call);
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
+ModulePass *llvm::createExpandByValPass() {
+  return new ExpandByVal();
+}
diff --git a/lib/Transforms/NaCl/ExpandConstantExpr.cpp b/lib/Transforms/NaCl/ExpandConstantExpr.cpp
new file mode 100644
index 0000000000..2856a9d7e4
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandConstantExpr.cpp
@@ -0,0 +1,93 @@
+//===- ExpandConstantExpr.cpp - Convert ConstantExprs to Instructions------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out ConstantExprs into Instructions.
+//
+// Note that this only converts ConstantExprs that are referenced by
+// Instructions.  It does not convert ConstantExprs that are used as
+// initializers for global variables.
+//
+// This simplifies the language so that the PNaCl translator does not
+// need to handle ConstantExprs as part of a stable wire format for
+// PNaCl.
+//
+//===----------------------------------------------------------------------===//
+
+#include <map>
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+static bool expandInstruction(Instruction *Inst);
+
+namespace {
+  // This is a FunctionPass because our handling of PHI nodes means
+  // that our modifications may cross BasicBlocks.
+  struct ExpandConstantExpr : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandConstantExpr() : FunctionPass(ID) {
+      initializeExpandConstantExprPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &Func);
+  };
+}
+
+char ExpandConstantExpr::ID = 0;
+INITIALIZE_PASS(ExpandConstantExpr, "expand-constant-expr",
+                "Expand out ConstantExprs into Instructions",
+                false, false)
+
+static Value *expandConstantExpr(Instruction *InsertPt, ConstantExpr *Expr) {
+  Instruction *NewInst = Expr->getAsInstruction();
+  NewInst->insertBefore(InsertPt);
+  NewInst->setName("expanded");
+  expandInstruction(NewInst);
+  return NewInst;
+}
+
+static bool expandInstruction(Instruction *Inst) {
+  // A landingpad can only accept ConstantExprs, so it should remain
+  // unmodified.
+  if (isa<LandingPadInst>(Inst))
+    return false;
+
+  bool Modified = false;
+  for (unsigned OpNum = 0; OpNum < Inst->getNumOperands(); OpNum++) {
+    if (ConstantExpr *Expr =
+        dyn_cast<ConstantExpr>(Inst->getOperand(OpNum))) {
+      Modified = true;
+      Use *U = &Inst->getOperandUse(OpNum);
+      PhiSafeReplaceUses(U, expandConstantExpr(PhiSafeInsertPt(U), Expr));
+    }
+  }
+  return Modified;
+}
+
+bool ExpandConstantExpr::runOnFunction(Function &Func) {
+  bool Modified = false;
+  for (llvm::Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E;
+       ++BB) {
+    for (BasicBlock::InstListType::iterator Inst = BB->begin(), E = BB->end();
+         Inst != E;
+         ++Inst) {
+      Modified |= expandInstruction(Inst);
+    }
+  }
+  return Modified;
+}
+
+FunctionPass *llvm::createExpandConstantExprPass() {
+  return new ExpandConstantExpr();
+}
diff --git a/lib/Transforms/NaCl/ExpandCtors.cpp b/lib/Transforms/NaCl/ExpandCtors.cpp
new file mode 100644
index 0000000000..fd38e2f0f1
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandCtors.cpp
@@ -0,0 +1,161 @@
+//===- ExpandCtors.cpp - Convert ctors/dtors to concrete arrays -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts LLVM's special symbols llvm.global_ctors and
+// llvm.global_dtors to concrete arrays, __init_array_start/end and
+// __fini_array_start/end, that are usable by a C library.
+//
+// This pass sorts the contents of global_ctors/dtors according to the
+// priority values they contain and removes the priority values.
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct ExpandCtors : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandCtors() : ModulePass(ID) {
+      initializeExpandCtorsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandCtors::ID = 0;
+INITIALIZE_PASS(ExpandCtors, "nacl-expand-ctors",
+                "Hook up constructor and destructor arrays to libc",
+                false, false)
+
+static void setGlobalVariableValue(Module &M, const char *Name,
+                                   Constant *Value) {
+  GlobalVariable *Var = M.getNamedGlobal(Name);
+  if (!Var) {
+    // This warning can happen in a program that does not use a libc
+    // and so does not call the functions in __init_array_start or
+    // __fini_array_end.  Such a program might be linked with
+    // "-nostdlib".
+    errs() << "Warning: Variable " << Name << " not referenced\n";
+  } else {
+    if (Var->hasInitializer()) {
+      report_fatal_error(std::string("Variable ") + Name +
+                         " already has an initializer");
+    }
+    Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType()));
+    Var->eraseFromParent();
+  }
+}
+
+struct FuncArrayEntry {
+  uint64_t priority;
+  Constant *func;
+};
+
+static bool compareEntries(FuncArrayEntry Entry1, FuncArrayEntry Entry2) {
+  return Entry1.priority < Entry2.priority;
+}
+
+static void readFuncList(GlobalVariable *Array, std::vector<Constant*> *Funcs) {
+  if (!Array->hasInitializer())
+    return;
+  Constant *Init = Array->getInitializer();
+  ArrayType *Ty = dyn_cast<ArrayType>(Init->getType());
+  if (!Ty) {
+    errs() << "Initializer: " << *Array->getInitializer() << "\n";
+    report_fatal_error("ExpandCtors: Initializer is not of array type");
+  }
+  if (Ty->getNumElements() == 0)
+    return;
+  ConstantArray *InitList = dyn_cast<ConstantArray>(Init);
+  if (!InitList) {
+    errs() << "Initializer: " << *Array->getInitializer() << "\n";
+    report_fatal_error("ExpandCtors: Unexpected initializer ConstantExpr");
+  }
+  std::vector<FuncArrayEntry> FuncsToSort;
+  for (unsigned Index = 0; Index < InitList->getNumOperands(); ++Index) {
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(Index));
+    FuncArrayEntry Entry;
+    Entry.priority = cast<ConstantInt>(CS->getOperand(0))->getZExtValue();
+    Entry.func = CS->getOperand(1);
+    FuncsToSort.push_back(Entry);
+  }
+
+  std::sort(FuncsToSort.begin(), FuncsToSort.end(), compareEntries);
+  for (std::vector<FuncArrayEntry>::iterator Iter = FuncsToSort.begin();
+       Iter != FuncsToSort.end();
+       ++Iter) {
+    Funcs->push_back(Iter->func);
+  }
+}
+
+static void defineFuncArray(Module &M, const char *LlvmArrayName,
+                            const char *StartSymbol,
+                            const char *EndSymbol) {
+  std::vector<Constant*> Funcs;
+
+  GlobalVariable *Array = M.getNamedGlobal(LlvmArrayName);
+  if (Array) {
+    readFuncList(Array, &Funcs);
+    // No code should be referencing global_ctors/global_dtors,
+    // because this symbol is internal to LLVM.
+    Array->eraseFromParent();
+  }
+
+  Type *FuncTy = FunctionType::get(Type::getVoidTy(M.getContext()), false);
+  Type *FuncPtrTy = FuncTy->getPointerTo();
+  ArrayType *ArrayTy = ArrayType::get(FuncPtrTy, Funcs.size());
+  GlobalVariable *NewArray =
+      new GlobalVariable(M, ArrayTy, /* isConstant= */ true,
+                         GlobalValue::InternalLinkage,
+                         ConstantArray::get(ArrayTy, Funcs));
+  setGlobalVariableValue(M, StartSymbol, NewArray);
+  // We do this last so that LLVM gives NewArray the name
+  // "__{init,fini}_array_start" without adding any suffixes to
+  // disambiguate from the original GlobalVariable's name.  This is
+  // not essential -- it just makes the output easier to understand
+  // when looking at symbols for debugging.
+  NewArray->setName(StartSymbol);
+
+  // We replace "__{init,fini}_array_end" with the address of the end
+  // of NewArray.  This removes the name "__{init,fini}_array_end"
+  // from the output, which is not ideal for debugging.  Ideally we
+  // would convert "__{init,fini}_array_end" to being a GlobalAlias
+  // that points to the end of the array.  However, unfortunately LLVM
+  // does not generate correct code when a GlobalAlias contains a
+  // GetElementPtr ConstantExpr.
+  Constant *NewArrayEnd =
+      ConstantExpr::getGetElementPtr(NewArray,
+                                     ConstantInt::get(M.getContext(),
+                                                      APInt(32, 1)));
+  setGlobalVariableValue(M, EndSymbol, NewArrayEnd);
+}
+
+bool ExpandCtors::runOnModule(Module &M) {
+  defineFuncArray(M, "llvm.global_ctors",
+                  "__init_array_start", "__init_array_end");
+  defineFuncArray(M, "llvm.global_dtors",
+                  "__fini_array_start", "__fini_array_end");
+  return true;
+}
+
+ModulePass *llvm::createExpandCtorsPass() {
+  return new ExpandCtors();
+}
diff --git a/lib/Transforms/NaCl/ExpandGetElementPtr.cpp b/lib/Transforms/NaCl/ExpandGetElementPtr.cpp
new file mode 100644
index 0000000000..1fe11293ca
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandGetElementPtr.cpp
@@ -0,0 +1,150 @@
+//===- ExpandGetElementPtr.cpp - Expand GetElementPtr into arithmetic------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out GetElementPtr instructions into ptrtoint,
+// inttoptr and arithmetic instructions.
+//
+// This simplifies the language so that the PNaCl translator does not
+// need to handle GetElementPtr and struct types as part of a stable
+// wire format for PNaCl.
+//
+// Note that we drop the "inbounds" attribute of GetElementPtr.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class ExpandGetElementPtr : public BasicBlockPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandGetElementPtr() : BasicBlockPass(ID) {
+      initializeExpandGetElementPtrPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnBasicBlock(BasicBlock &BB);
+  };
+}
+
+char ExpandGetElementPtr::ID = 0;
+INITIALIZE_PASS(ExpandGetElementPtr, "expand-getelementptr",
+                "Expand out GetElementPtr instructions into arithmetic",
+                false, false)
+
+static Value *CastToPtrSize(Value *Val, Instruction *InsertPt,
+                            const DebugLoc &Debug, Type *PtrType) {
+  unsigned ValSize = Val->getType()->getIntegerBitWidth();
+  unsigned PtrSize = PtrType->getIntegerBitWidth();
+  if (ValSize == PtrSize)
+    return Val;
+  Instruction *Inst;
+  if (ValSize > PtrSize) {
+    Inst = new TruncInst(Val, PtrType, "gep_trunc", InsertPt);
+  } else {
+    // GEP indexes must be sign-extended.
+    Inst = new SExtInst(Val, PtrType, "gep_sext", InsertPt);
+  }
+  Inst->setDebugLoc(Debug);
+  return Inst;
+}
+
+static void FlushOffset(Instruction **Ptr, uint64_t *CurrentOffset,
+                        Instruction *InsertPt, const DebugLoc &Debug,
+                        Type *PtrType) {
+  if (*CurrentOffset) {
+    *Ptr = BinaryOperator::Create(Instruction::Add, *Ptr,
+                                  ConstantInt::get(PtrType, *CurrentOffset),
+                                  "gep", InsertPt);
+    (*Ptr)->setDebugLoc(Debug);
+    *CurrentOffset = 0;
+  }
+}
+
+static void ExpandGEP(GetElementPtrInst *GEP, DataLayout *DL, Type *PtrType) {
+  const DebugLoc &Debug = GEP->getDebugLoc();
+  Instruction *Ptr = new PtrToIntInst(GEP->getPointerOperand(), PtrType,
+                                      "gep_int", GEP);
+  Ptr->setDebugLoc(Debug);
+
+  Type *CurrentTy = GEP->getPointerOperand()->getType();
+  // We do some limited constant folding ourselves.  An alternative
+  // would be to generate verbose, unfolded output (e.g. multiple
+  // adds; adds of zero constants) and use a later pass such as
+  // "-instcombine" to clean that up.  However, "-instcombine" can
+  // reintroduce GetElementPtr instructions.
+  uint64_t CurrentOffset = 0;
+
+  for (GetElementPtrInst::op_iterator Op = GEP->op_begin() + 1;
+       Op != GEP->op_end();
+       ++Op) {
+    Value *Index = *Op;
+    if (StructType *StTy = dyn_cast<StructType>(CurrentTy)) {
+      uint64_t Field = cast<ConstantInt>(Op)->getZExtValue();
+      CurrentTy = StTy->getElementType(Field);
+      CurrentOffset += DL->getStructLayout(StTy)->getElementOffset(Field);
+    } else {
+      CurrentTy = cast<SequentialType>(CurrentTy)->getElementType();
+      uint64_t ElementSize = DL->getTypeAllocSize(CurrentTy);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Index)) {
+        CurrentOffset += C->getSExtValue() * ElementSize;
+      } else {
+        FlushOffset(&Ptr, &CurrentOffset, GEP, Debug, PtrType);
+        Index = CastToPtrSize(Index, GEP, Debug, PtrType);
+        if (ElementSize != 1) {
+          Index = CopyDebug(
+              BinaryOperator::Create(Instruction::Mul, Index,
+                                     ConstantInt::get(PtrType, ElementSize),
+                                     "gep_array", GEP),
+              GEP);
+        }
+        Ptr = BinaryOperator::Create(Instruction::Add, Ptr,
+                                     Index, "gep", GEP);
+        Ptr->setDebugLoc(Debug);
+      }
+    }
+  }
+  FlushOffset(&Ptr, &CurrentOffset, GEP, Debug, PtrType);
+
+  assert(CurrentTy == GEP->getType()->getElementType());
+  Instruction *Result = new IntToPtrInst(Ptr, GEP->getType(), "", GEP);
+  Result->setDebugLoc(Debug);
+  Result->takeName(GEP);
+  GEP->replaceAllUsesWith(Result);
+  GEP->eraseFromParent();
+}
+
+bool ExpandGetElementPtr::runOnBasicBlock(BasicBlock &BB) {
+  bool Modified = false;
+  DataLayout DL(BB.getParent()->getParent());
+  Type *PtrType = DL.getIntPtrType(BB.getContext());
+
+  for (BasicBlock::InstListType::iterator Iter = BB.begin();
+       Iter != BB.end(); ) {
+    Instruction *Inst = Iter++;
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+      Modified = true;
+      ExpandGEP(GEP, &DL, PtrType);
+    }
+  }
+  return Modified;
+}
+
+BasicBlockPass *llvm::createExpandGetElementPtrPass() {
+  return new ExpandGetElementPtr();
+}
diff --git a/lib/Transforms/NaCl/ExpandSmallArguments.cpp b/lib/Transforms/NaCl/ExpandSmallArguments.cpp
new file mode 100644
index 0000000000..c8a321edb9
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandSmallArguments.cpp
@@ -0,0 +1,217 @@
+//===- ExpandSmallArguments.cpp - Expand out arguments smaller than i32----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LLVM IR allows function return types and argument types such as
+// "zeroext i8" and "signext i8".  The Language Reference says that
+// zeroext "indicates to the code generator that the parameter or
+// return value should be zero-extended to the extent required by the
+// target's ABI (which is usually 32-bits, but is 8-bits for a i1 on
+// x86-64) by the caller (for a parameter) or the callee (for a return
+// value)".
+//
+// This can lead to non-portable behaviour when calling functions
+// without C prototypes or with wrong C prototypes.
+//
+// In order to remove this non-portability from PNaCl, and to simplify
+// the language that the PNaCl translator accepts, the
+// ExpandSmallArguments pass widens integer arguments and return types
+// to be at least 32 bits.  The pass inserts explicit cast
+// instructions (ZExtInst/SExtInst/TruncInst) as needed.
+//
+// The pass chooses between ZExtInst and SExtInst widening based on
+// whether a "signext" attribute is present.  However, in principle
+// the pass could always use zero-extension, because the extent to
+// which either zero-extension or sign-extension is done is up to the
+// target ABI, which is up to PNaCl to specify.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass recreates functions in
+  // order to change their arguments' types.
+  class ExpandSmallArguments : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandSmallArguments() : ModulePass(ID) {
+      initializeExpandSmallArgumentsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandSmallArguments::ID = 0;
+INITIALIZE_PASS(ExpandSmallArguments, "expand-small-arguments",
+                "Expand function arguments to be at least 32 bits in size",
+                false, false)
+
+// Returns the normalized version of the given argument/return type.
+static Type *NormalizeType(Type *Ty) {
+  if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
+    if (IntTy->getBitWidth() < 32) {
+      return IntegerType::get(Ty->getContext(), 32);
+    }
+  }
+  return Ty;
+}
+
+// Returns the normalized version of the given function type.
+static FunctionType *NormalizeFunctionType(FunctionType *FTy) {
+  if (FTy->isVarArg()) {
+    report_fatal_error(
+        "ExpandSmallArguments does not handle varargs functions");
+  }
+  SmallVector<Type *, 8> ArgTypes;
+  for (unsigned I = 0; I < FTy->getNumParams(); ++I) {
+    ArgTypes.push_back(NormalizeType(FTy->getParamType(I)));
+  }
+  return FunctionType::get(NormalizeType(FTy->getReturnType()),
+                           ArgTypes, false);
+}
+
+// Convert the given function to use normalized argument/return types.
+static bool ConvertFunction(Function *Func) {
+  FunctionType *FTy = Func->getFunctionType();
+  FunctionType *NFTy = NormalizeFunctionType(FTy);
+  if (NFTy == FTy)
+    return false; // No change needed.
+  Function *NewFunc = RecreateFunction(Func, NFTy);
+
+  // Move the arguments across to the new function.
+  for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(),
+         NewArg = NewFunc->arg_begin();
+       Arg != E; ++Arg, ++NewArg) {
+    NewArg->takeName(Arg);
+    if (Arg->getType() == NewArg->getType()) {
+      Arg->replaceAllUsesWith(NewArg);
+    } else {
+      Instruction *Trunc = new TruncInst(
+          NewArg, Arg->getType(), NewArg->getName() + ".arg_trunc",
+          NewFunc->getEntryBlock().getFirstInsertionPt());
+      Arg->replaceAllUsesWith(Trunc);
+    }
+  }
+
+  if (FTy->getReturnType() != NFTy->getReturnType()) {
+    // Fix up return instructions.
+    Instruction::CastOps CastType =
+        Func->getAttributes().hasAttribute(0, Attribute::SExt) ?
+        Instruction::SExt : Instruction::ZExt;
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E;
+         ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (ReturnInst *Ret = dyn_cast<ReturnInst>(Inst)) {
+          Value *Ext = CopyDebug(
+              CastInst::Create(CastType, Ret->getReturnValue(),
+                               NFTy->getReturnType(),
+                               Ret->getReturnValue()->getName() + ".ret_ext",
+                               Ret),
+              Ret);
+          CopyDebug(ReturnInst::Create(Ret->getContext(), Ext, Ret), Ret);
+          Ret->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  Func->eraseFromParent();
+  return true;
+}
+
+// Convert the given call to use normalized argument/return types.
+static bool ConvertCall(CallInst *Call) {
+  // Don't try to change calls to intrinsics.
+  if (isa<IntrinsicInst>(Call))
+    return false;
+  FunctionType *FTy = cast<FunctionType>(
+      Call->getCalledValue()->getType()->getPointerElementType());
+  FunctionType *NFTy = NormalizeFunctionType(FTy);
+  if (NFTy == FTy)
+    return false; // No change needed.
+
+  // Convert arguments.
+  SmallVector<Value *, 8> Args;
+  for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) {
+    Value *Arg = Call->getArgOperand(I);
+    if (NFTy->getParamType(I) != FTy->getParamType(I)) {
+      Instruction::CastOps CastType =
+          Call->getAttributes().hasAttribute(I + 1, Attribute::SExt) ?
+          Instruction::SExt : Instruction::ZExt;
+      Arg = CopyDebug(CastInst::Create(CastType, Arg, NFTy->getParamType(I),
+                                       "arg_ext", Call), Call);
+    }
+    Args.push_back(Arg);
+  }
+  Value *CastFunc =
+    CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(),
+                              Call->getName() + ".arg_cast", Call), Call);
+  CallInst *NewCall = CallInst::Create(CastFunc, Args, "", Call);
+  CopyDebug(NewCall, Call);
+  NewCall->takeName(Call);
+  NewCall->setAttributes(Call->getAttributes());
+  NewCall->setCallingConv(Call->getCallingConv());
+  NewCall->setTailCall(Call->isTailCall());
+  Value *Result = NewCall;
+  if (FTy->getReturnType() != NFTy->getReturnType()) {
+    Result = CopyDebug(new TruncInst(NewCall, FTy->getReturnType(),
+                                     NewCall->getName() + ".ret_trunc",
+                                     Call), Call);
+  }
+  Call->replaceAllUsesWith(Result);
+  Call->eraseFromParent();
+  return true;
+}
+
+bool ExpandSmallArguments::runOnModule(Module &M) {
+  bool Changed = false;
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *Func = Iter++;
+    // Don't try to change intrinsic declarations because intrinsics
+    // will continue to have non-normalized argument types.  For
+    // example, memset() takes an i8 argument.  It shouldn't matter
+    // whether we modify the types of other function declarations, but
+    // we don't expect to see non-intrinsic function declarations in a
+    // PNaCl pexe.
+    if (Func->empty())
+      continue;
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Changed |= ConvertCall(Call);
+        } else if (isa<InvokeInst>(Inst)) {
+          report_fatal_error(
+              "ExpandSmallArguments does not handle invoke instructions");
+        }
+      }
+    }
+
+    Changed |= ConvertFunction(Func);
+  }
+  return Changed;
+}
+
+ModulePass *llvm::createExpandSmallArgumentsPass() {
+  return new ExpandSmallArguments();
+}
diff --git a/lib/Transforms/NaCl/ExpandStructRegs.cpp b/lib/Transforms/NaCl/ExpandStructRegs.cpp
new file mode 100644
index 0000000000..5c11a76c8b
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandStructRegs.cpp
@@ -0,0 +1,295 @@
+//===- ExpandStructRegs.cpp - Expand out variables with struct type--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out some uses of LLVM variables
+// (a.k.a. registers) of struct type.  It replaces loads and stores of
+// structs with separate loads and stores of the structs' fields.  The
+// motivation is to omit struct types from PNaCl's stable ABI.
+//
+// ExpandStructRegs does not yet handle all possible uses of struct
+// values.  It is intended to handle the uses that Clang and the SROA
+// pass generate.  Clang generates struct loads and stores, along with
+// extractvalue instructions, in its implementation of C++ method
+// pointers, and the SROA pass sometimes converts this code to using
+// insertvalue instructions too.
+//
+// ExpandStructRegs does not handle:
+//
+//  * Nested struct types.
+//  * Array types.
+//  * Function types containing arguments or return values of struct
+//    type without the "byval" or "sret" attributes.  Since by-value
+//    struct-passing generally uses "byval"/"sret", this does not
+//    matter.
+//
+// Other limitations:
+//
+//  * ExpandStructRegs does not attempt to use memcpy() where that
+//    might be more appropriate than copying fields individually.
+//  * ExpandStructRegs does not preserve the contents of padding
+//    between fields when copying structs.  However, the contents of
+//    padding fields are not defined anyway.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct ExpandStructRegs : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandStructRegs() : FunctionPass(ID) {
+      initializeExpandStructRegsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char ExpandStructRegs::ID = 0;
+INITIALIZE_PASS(ExpandStructRegs, "expand-struct-regs",
+                "Expand out variables with struct types", false, false)
+
+static void SplitUpPHINode(PHINode *Phi) {
+  StructType *STy = cast<StructType>(Phi->getType());
+
+  Value *NewStruct = UndefValue::get(STy);
+  Instruction *NewStructInsertPt = Phi->getParent()->getFirstInsertionPt();
+
+  // Create a separate PHINode for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+
+    PHINode *NewPhi = PHINode::Create(
+        STy->getElementType(Index), Phi->getNumIncomingValues(),
+        Phi->getName() + ".index", Phi);
+    CopyDebug(NewPhi, Phi);
+    for (unsigned PhiIndex = 0; PhiIndex < Phi->getNumIncomingValues();
+         ++PhiIndex) {
+      BasicBlock *IncomingBB = Phi->getIncomingBlock(PhiIndex);
+      Value *EV = CopyDebug(
+          ExtractValueInst::Create(
+              Phi->getIncomingValue(PhiIndex), EVIndexes,
+              Phi->getName() + ".extract", IncomingBB->getTerminator()), Phi);
+      NewPhi->addIncoming(EV, IncomingBB);
+    }
+
+    // Reconstruct the original struct value.
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewPhi, EVIndexes,
+                                Phi->getName() + ".insert", NewStructInsertPt),
+        Phi);
+  }
+  Phi->replaceAllUsesWith(NewStruct);
+  Phi->eraseFromParent();
+}
+
+static void SplitUpSelect(SelectInst *Select) {
+  StructType *STy = cast<StructType>(Select->getType());
+  Value *NewStruct = UndefValue::get(STy);
+
+  // Create a separate SelectInst for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+
+    Value *TrueVal = CopyDebug(
+        ExtractValueInst::Create(Select->getTrueValue(), EVIndexes,
+                                 Select->getName() + ".extract", Select),
+        Select);
+    Value *FalseVal = CopyDebug(
+        ExtractValueInst::Create(Select->getFalseValue(), EVIndexes,
+                                 Select->getName() + ".extract", Select),
+        Select);
+    Value *NewSelect = CopyDebug(
+        SelectInst::Create(Select->getCondition(), TrueVal, FalseVal,
+                           Select->getName() + ".index", Select), Select);
+
+    // Reconstruct the original struct value.
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewSelect, EVIndexes,
+                                Select->getName() + ".insert", Select),
+        Select);
+  }
+  Select->replaceAllUsesWith(NewStruct);
+  Select->eraseFromParent();
+}
+
+template <class InstType>
+static void ProcessLoadOrStoreAttrs(InstType *Dest, InstType *Src) {
+  CopyDebug(Dest, Src);
+  Dest->setVolatile(Src->isVolatile());
+  if (Src->isAtomic()) {
+    errs() << "Use: " << *Src << "\n";
+    report_fatal_error("Atomic struct loads/stores not supported");
+  }
+  // Make a pessimistic assumption about alignment.  Preserving
+  // alignment information here is tricky and is not really desirable
+  // for PNaCl because mistakes here could lead to non-portable
+  // behaviour.
+  Dest->setAlignment(1);
+}
+
+static void SplitUpStore(StoreInst *Store) {
+  StructType *STy = cast<StructType>(Store->getValueOperand()->getType());
+  // Create a separate store instruction for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, Index)));
+    Value *GEP = CopyDebug(GetElementPtrInst::Create(
+                               Store->getPointerOperand(), Indexes,
+                               Store->getPointerOperand()->getName() + ".index",
+                               Store), Store);
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+    Value *Field = ExtractValueInst::Create(Store->getValueOperand(),
+                                            EVIndexes, "", Store);
+    StoreInst *NewStore = new StoreInst(Field, GEP, Store);
+    ProcessLoadOrStoreAttrs(NewStore, Store);
+  }
+  Store->eraseFromParent();
+}
+
+static void SplitUpLoad(LoadInst *Load) {
+  StructType *STy = cast<StructType>(Load->getType());
+  Value *NewStruct = UndefValue::get(STy);
+
+  // Create a separate load instruction for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, Index)));
+    Value *GEP = CopyDebug(
+        GetElementPtrInst::Create(Load->getPointerOperand(), Indexes,
+                                  Load->getName() + ".index", Load), Load);
+    LoadInst *NewLoad = new LoadInst(GEP, Load->getName() + ".field", Load);
+    ProcessLoadOrStoreAttrs(NewLoad, Load);
+
+    // Reconstruct the struct value.
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewLoad, EVIndexes,
+                                Load->getName() + ".insert", Load), Load);
+  }
+  Load->replaceAllUsesWith(NewStruct);
+  Load->eraseFromParent();
+}
+
+static void ExpandExtractValue(ExtractValueInst *EV) {
+  // Search for the insertvalue instruction that inserts the struct
+  // field referenced by this extractvalue instruction.
+  Value *StructVal = EV->getAggregateOperand();
+  Value *ResultField;
+  for (;;) {
+    if (InsertValueInst *IV = dyn_cast<InsertValueInst>(StructVal)) {
+      if (EV->getNumIndices() != 1 || IV->getNumIndices() != 1) {
+        errs() << "Value: " << *EV << "\n";
+        errs() << "Value: " << *IV << "\n";
+        report_fatal_error("ExpandStructRegs does not handle nested structs");
+      }
+      if (EV->getIndices()[0] == IV->getIndices()[0]) {
+        ResultField = IV->getInsertedValueOperand();
+        break;
+      }
+      // No match.  Try the next struct value in the chain.
+      StructVal = IV->getAggregateOperand();
+    } else if (Constant *C = dyn_cast<Constant>(StructVal)) {
+      ResultField = ConstantExpr::getExtractValue(C, EV->getIndices());
+      break;
+    } else {
+      errs() << "Value: " << *StructVal << "\n";
+      report_fatal_error("Unrecognized struct value");
+    }
+  }
+  EV->replaceAllUsesWith(ResultField);
+  EV->eraseFromParent();
+}
+
+bool ExpandStructRegs::runOnFunction(Function &Func) {
+  bool Changed = false;
+
+  // Split up aggregate loads, stores and phi nodes into operations on
+  // scalar types.  This inserts extractvalue and insertvalue
+  // instructions which we will expand out later.
+  for (Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+        if (Store->getValueOperand()->getType()->isStructTy()) {
+          SplitUpStore(Store);
+          Changed = true;
+        }
+      } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+        if (Load->getType()->isStructTy()) {
+          SplitUpLoad(Load);
+          Changed = true;
+        }
+      } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+        if (Phi->getType()->isStructTy()) {
+          SplitUpPHINode(Phi);
+          Changed = true;
+        }
+      } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
+        if (Select->getType()->isStructTy()) {
+          SplitUpSelect(Select);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  // Expand out all the extractvalue instructions.  Also collect up
+  // the insertvalue instructions for later deletion so that we do not
+  // need to make extra passes across the whole function.
+  SmallVector<Instruction *, 10> ToErase;
+  for (Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Inst)) {
+        ExpandExtractValue(EV);
+        Changed = true;
+      } else if (isa<InsertValueInst>(Inst)) {
+        ToErase.push_back(Inst);
+        Changed = true;
+      }
+    }
+  }
+  // Delete the insertvalue instructions.  These can reference each
+  // other, so we must do dropAllReferences() before doing
+  // eraseFromParent(), otherwise we will try to erase instructions
+  // that are still referenced.
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->dropAllReferences();
+  }
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->eraseFromParent();
+  }
+  return Changed;
+}
+
+FunctionPass *llvm::createExpandStructRegsPass() {
+  return new ExpandStructRegs();
+}
diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp
new file mode 100644
index 0000000000..19837f7448
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTls.cpp
@@ -0,0 +1,334 @@
+//===- ExpandTls.cpp - Convert TLS variables to a concrete layout----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out uses of thread-local (TLS) variables into
+// more primitive operations.
+//
+// A reference to the address of a TLS variable is expanded into code
+// which gets the current thread's thread pointer using
+// @llvm.nacl.read.tp() and adds a fixed offset.
+//
+// This pass allocates the offsets (relative to the thread pointer)
+// that will be used for TLS variables.  It sets up the global
+// variables __tls_template_start, __tls_template_end etc. to contain
+// a template for initializing TLS variables' values for each thread.
+// This is a task normally performed by the linker in ELF systems.
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct VarInfo {
+    GlobalVariable *TlsVar;
+    bool IsBss; // Whether variable is in zero-intialized part of template
+    int TemplateIndex;
+  };
+
+  class PassState {
+  public:
+    PassState(Module *M): M(M), DL(M), Offset(0), Alignment(1) {}
+
+    Module *M;
+    DataLayout DL;
+    uint64_t Offset;
+    // 'Alignment' is the maximum variable alignment seen so far, in
+    // bytes.  After visiting all TLS variables, this is the overall
+    // alignment required for the TLS template.
+    uint32_t Alignment;
+  };
+
+  class ExpandTls : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandTls() : ModulePass(ID) {
+      initializeExpandTlsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandTls::ID = 0;
+INITIALIZE_PASS(ExpandTls, "nacl-expand-tls",
+                "Expand out TLS variables and fix TLS variable layout",
+                false, false)
+
+static void setGlobalVariableValue(Module &M, const char *Name,
+                                   Constant *Value) {
+  GlobalVariable *Var = M.getNamedGlobal(Name);
+  if (!Var) {
+    // This warning can happen in a program that does not use a libc
+    // and does not initialize TLS variables.  Such a program might be
+    // linked with "-nostdlib".
+    errs() << "Warning: Variable " << Name << " not referenced\n";
+  } else {
+    if (Var->hasInitializer()) {
+      report_fatal_error(std::string("Variable ") + Name +
+                         " already has an initializer");
+    }
+    Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType()));
+    Var->eraseFromParent();
+  }
+}
+
+// Insert alignment padding into the TLS template.
+static void padToAlignment(PassState *State,
+                           std::vector<Type*> *FieldTypes,
+                           std::vector<Constant*> *FieldValues,
+                           unsigned Alignment) {
+  if ((State->Offset & (Alignment - 1)) != 0) {
+    unsigned PadSize = Alignment - (State->Offset & (Alignment - 1));
+    Type *i8 = Type::getInt8Ty(State->M->getContext());
+    Type *PadType = ArrayType::get(i8, PadSize);
+    FieldTypes->push_back(PadType);
+    if (FieldValues)
+      FieldValues->push_back(Constant::getNullValue(PadType));
+    State->Offset += PadSize;
+  }
+  if (State->Alignment < Alignment) {
+    State->Alignment = Alignment;
+  }
+}
+
+static void addVarToTlsTemplate(PassState *State,
+                                std::vector<Type*> *FieldTypes,
+                                std::vector<Constant*> *FieldValues,
+                                GlobalVariable *TlsVar) {
+  unsigned Alignment = State->DL.getPreferredAlignment(TlsVar);
+  padToAlignment(State, FieldTypes, FieldValues, Alignment);
+
+  FieldTypes->push_back(TlsVar->getType()->getElementType());
+  if (FieldValues)
+    FieldValues->push_back(TlsVar->getInitializer());
+  State->Offset +=
+      State->DL.getTypeAllocSize(TlsVar->getType()->getElementType());
+}
+
+static PointerType *buildTlsTemplate(Module &M, std::vector<VarInfo> *TlsVars) {
+  std::vector<Type*> FieldBssTypes;
+  std::vector<Type*> FieldInitTypes;
+  std::vector<Constant*> FieldInitValues;
+  PassState State(&M);
+
+  for (Module::global_iterator GV = M.global_begin();
+       GV != M.global_end();
+       ++GV) {
+    if (GV->isThreadLocal()) {
+      if (!GV->hasInitializer()) {
+        // Since this is a whole-program transformation, "extern" TLS
+        // variables are not allowed at this point.
+        report_fatal_error(std::string("TLS variable without an initializer: ")
+                           + GV->getName());
+      }
+      if (!GV->getInitializer()->isNullValue()) {
+        addVarToTlsTemplate(&State, &FieldInitTypes,
+                            &FieldInitValues, GV);
+        VarInfo Info;
+        Info.TlsVar = GV;
+        Info.IsBss = false;
+        Info.TemplateIndex = FieldInitTypes.size() - 1;
+        TlsVars->push_back(Info);
+      }
+    }
+  }
+  // Handle zero-initialized TLS variables in a second pass, because
+  // these should follow non-zero-initialized TLS variables.
+  for (Module::global_iterator GV = M.global_begin();
+       GV != M.global_end();
+       ++GV) {
+    if (GV->isThreadLocal() && GV->getInitializer()->isNullValue()) {
+      addVarToTlsTemplate(&State, &FieldBssTypes, NULL, GV);
+      VarInfo Info;
+      Info.TlsVar = GV;
+      Info.IsBss = true;
+      Info.TemplateIndex = FieldBssTypes.size() - 1;
+      TlsVars->push_back(Info);
+    }
+  }
+  // Add final alignment padding so that
+  //   (struct tls_struct *) __nacl_read_tp() - 1
+  // gives the correct, aligned start of the TLS variables given the
+  // x86-style layout we are using.  This requires some more bytes to
+  // be memset() to zero at runtime.  This wastage doesn't seem
+  // important gives that we're not trying to optimize packing by
+  // reordering to put similarly-aligned variables together.
+  padToAlignment(&State, &FieldBssTypes, NULL, State.Alignment);
+
+  // We create the TLS template structs as "packed" because we insert
+  // alignment padding ourselves, and LLVM's implicit insertion of
+  // padding would interfere with ours.  tls_bss_template can start at
+  // a non-aligned address immediately following the last field in
+  // tls_init_template.
+  StructType *InitTemplateType =
+      StructType::create(M.getContext(), "tls_init_template");
+  InitTemplateType->setBody(FieldInitTypes, /*isPacked=*/true);
+  StructType *BssTemplateType =
+      StructType::create(M.getContext(), "tls_bss_template");
+  BssTemplateType->setBody(FieldBssTypes, /*isPacked=*/true);
+
+  StructType *TemplateType = StructType::create(M.getContext(), "tls_struct");
+  SmallVector<Type*, 2> TemplateTopFields;
+  TemplateTopFields.push_back(InitTemplateType);
+  TemplateTopFields.push_back(BssTemplateType);
+  TemplateType->setBody(TemplateTopFields, /*isPacked=*/true);
+  PointerType *TemplatePtrType = PointerType::get(TemplateType, 0);
+
+  // We define the following symbols, which are the same as those
+  // defined by NaCl's original customized binutils linker scripts:
+  //   __tls_template_start
+  //   __tls_template_tdata_end
+  //   __tls_template_end
+  // We also define __tls_template_alignment, which was not defined by
+  // the original linker scripts.
+
+  const char *StartSymbol = "__tls_template_start";
+  Constant *TemplateData = ConstantStruct::get(InitTemplateType,
+                                               FieldInitValues);
+  GlobalVariable *TemplateDataVar =
+      new GlobalVariable(M, InitTemplateType, /*isConstant=*/true,
+                         GlobalValue::InternalLinkage, TemplateData);
+  setGlobalVariableValue(M, StartSymbol, TemplateDataVar);
+  TemplateDataVar->setName(StartSymbol);
+
+  Constant *TdataEnd = ConstantExpr::getGetElementPtr(
+      TemplateDataVar,
+      ConstantInt::get(M.getContext(), APInt(32, 1)));
+  setGlobalVariableValue(M, "__tls_template_tdata_end", TdataEnd);
+
+  Constant *TotalEnd = ConstantExpr::getGetElementPtr(
+      ConstantExpr::getBitCast(TemplateDataVar, TemplatePtrType),
+      ConstantInt::get(M.getContext(), APInt(32, 1)));
+  setGlobalVariableValue(M, "__tls_template_end", TotalEnd);
+
+  const char *AlignmentSymbol = "__tls_template_alignment";
+  Type *i32 = Type::getInt32Ty(M.getContext());
+  GlobalVariable *AlignmentVar = new GlobalVariable(
+      M, i32, /*isConstant=*/true,
+      GlobalValue::InternalLinkage,
+      ConstantInt::get(M.getContext(), APInt(32, State.Alignment)));
+  setGlobalVariableValue(M, AlignmentSymbol, AlignmentVar);
+  AlignmentVar->setName(AlignmentSymbol);
+
+  return TemplatePtrType;
+}
+
+static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars,
+                           PointerType *TemplatePtrType) {
+  // Set up the intrinsic that reads the thread pointer.
+  Function *ReadTpFunc = Intrinsic::getDeclaration(&M, Intrinsic::nacl_read_tp);
+
+  for (std::vector<VarInfo>::iterator VarInfo = TlsVars->begin();
+       VarInfo != TlsVars->end();
+       ++VarInfo) {
+    GlobalVariable *Var = VarInfo->TlsVar;
+    while (!Var->use_empty()) {
+      Use *U = &Var->use_begin().getUse();
+      Instruction *InsertPt = PhiSafeInsertPt(U);
+      Value *RawThreadPtr = CallInst::Create(ReadTpFunc, "tls_raw", InsertPt);
+      Value *TypedThreadPtr = new BitCastInst(RawThreadPtr, TemplatePtrType,
+                                              "tls_struct", InsertPt);
+      SmallVector<Value*, 3> Indexes;
+      // We use -1 because we use the x86-style TLS layout in which
+      // the TLS data is stored at addresses below the thread pointer.
+      // This is largely because a check in nacl_irt_thread_create()
+      // in irt/irt_thread.c requires the thread pointer to be a
+      // self-pointer on x86-32.
+      // TODO(mseaborn): I intend to remove that check because it is
+      // non-portable.  In the mean time, we want PNaCl pexes to work
+      // in older Chromium releases when translated to nexes.
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, -1)));
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, VarInfo->IsBss ? 1 : 0)));
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, VarInfo->TemplateIndex)));
+      Value *TlsField = GetElementPtrInst::Create(TypedThreadPtr, Indexes,
+                                                  "field", InsertPt);
+      PhiSafeReplaceUses(U, TlsField);
+    }
+    VarInfo->TlsVar->eraseFromParent();
+  }
+}
+
+// Provide fixed definitions for PNaCl's TLS layout intrinsics.  We
+// adopt the x86-style layout: ExpandTls will output a program that
+// uses the x86-style layout wherever it runs.  This overrides any
+// architecture-specific definitions of the intrinsics that the LLVM
+// backend might provide.
+static void defineTlsLayoutIntrinsics(Module &M) {
+  Type *i32 = Type::getInt32Ty(M.getContext());
+  SmallVector<Type*, 1> ArgTypes;
+  ArgTypes.push_back(i32);
+  FunctionType *FuncType = FunctionType::get(i32, ArgTypes, /*isVarArg=*/false);
+  Function *NewFunc;
+  BasicBlock *BB;
+
+  // Define the intrinsic as follows:
+  //   uint32_t __nacl_tp_tdb_offset(uint32_t tdb_size) {
+  //     return 0;
+  //   }
+  // This means the thread pointer points to the TDB.
+  NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+                             "nacl_tp_tdb_offset", &M);
+  BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+  ReturnInst::Create(M.getContext(),
+                     ConstantInt::get(M.getContext(), APInt(32, 0)), BB);
+  if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tdb.offset")) {
+    Intrinsic->replaceAllUsesWith(NewFunc);
+    Intrinsic->eraseFromParent();
+  }
+
+  // Define the intrinsic as follows:
+  //   uint32_t __nacl_tp_tls_offset(uint32_t tls_size) {
+  //     return -tls_size;
+  //   }
+  // This means the TLS variables are stored below the thread pointer.
+  NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+                             "nacl_tp_tls_offset", &M);
+  BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+  Value *Arg = NewFunc->arg_begin();
+  Arg->setName("size");
+  Value *Result = BinaryOperator::CreateNeg(Arg, "result", BB);
+  ReturnInst::Create(M.getContext(), Result, BB);
+  if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tls.offset")) {
+    Intrinsic->replaceAllUsesWith(NewFunc);
+    Intrinsic->eraseFromParent();
+  }
+}
+
+bool ExpandTls::runOnModule(Module &M) {
+  ModulePass *Pass = createExpandTlsConstantExprPass();
+  Pass->runOnModule(M);
+  delete Pass;
+
+  std::vector<VarInfo> TlsVars;
+  PointerType *TemplatePtrType = buildTlsTemplate(M, &TlsVars);
+  rewriteTlsVars(M, &TlsVars, TemplatePtrType);
+
+  defineTlsLayoutIntrinsics(M);
+
+  return true;
+}
+
+ModulePass *llvm::createExpandTlsPass() {
+  return new ExpandTls();
+}
diff --git a/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
new file mode 100644
index 0000000000..328e5e08e6
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
@@ -0,0 +1,111 @@
+//===- ExpandTlsConstantExpr.cpp - Convert ConstantExprs to Instructions---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a helper used by the ExpandTls pass.
+//
+// LLVM treats the address of a TLS variable as a ConstantExpr.  This
+// is arguably a bug because the address of a TLS variable is *not* a
+// constant: it varies between threads.
+//
+// See http://llvm.org/bugs/show_bug.cgi?id=14353
+//
+// This is also a problem for the ExpandTls pass, which wants to use
+// replaceUsesOfWith() to replace each TLS variable with an
+// Instruction sequence that calls @llvm.nacl.read.tp().  This doesn't
+// work if the TLS variable is used inside other ConstantExprs,
+// because ConstantExprs are interned and are not associated with any
+// function, whereas each Instruction must be part of a function.
+//
+// To fix that problem, this pass converts ConstantExprs that
+// reference TLS variables into Instructions.
+//
+// For example, this use of a 'ptrtoint' ConstantExpr:
+//
+//   ret i32 ptrtoint (i32* @tls_var to i32)
+//
+// is converted into this 'ptrtoint' Instruction:
+//
+//   %expanded = ptrtoint i32* @tls_var to i32
+//   ret i32 %expanded
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class ExpandTlsConstantExpr : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandTlsConstantExpr() : ModulePass(ID) {
+      initializeExpandTlsConstantExprPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandTlsConstantExpr::ID = 0;
+INITIALIZE_PASS(ExpandTlsConstantExpr, "nacl-expand-tls-constant-expr",
+                "Eliminate ConstantExpr references to TLS variables",
+                false, false)
+
+// This removes ConstantExpr references to the given Constant.
+static void expandConstExpr(Constant *Expr) {
+  // First, ensure that ConstantExpr references to Expr are converted
+  // to Instructions so that we can modify them.
+  for (Value::use_iterator UI = Expr->use_begin();
+       UI != Expr->use_end();
+       ++UI) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+      expandConstExpr(CE);
+    }
+  }
+  Expr->removeDeadConstantUsers();
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Expr)) {
+    while (!Expr->use_empty()) {
+      Use *U = &Expr->use_begin().getUse();
+      Instruction *NewInst = CE->getAsInstruction();
+      NewInst->insertBefore(PhiSafeInsertPt(U));
+      NewInst->setName("expanded");
+      PhiSafeReplaceUses(U, NewInst);
+    }
+  }
+}
+
+bool ExpandTlsConstantExpr::runOnModule(Module &M) {
+  for (Module::alias_iterator Iter = M.alias_begin();
+       Iter != M.alias_end(); ) {
+    GlobalAlias *GA = Iter++;
+    if (GA->isThreadDependent()) {
+      GA->replaceAllUsesWith(GA->getAliasee());
+      GA->eraseFromParent();
+    }
+  }
+  for (Module::global_iterator Global = M.global_begin();
+       Global != M.global_end();
+       ++Global) {
+    if (Global->isThreadLocal()) {
+      expandConstExpr(Global);
+    }
+  }
+  return true;
+}
+
+ModulePass *llvm::createExpandTlsConstantExprPass() {
+  return new ExpandTlsConstantExpr();
+}
diff --git a/lib/Transforms/NaCl/ExpandUtils.cpp b/lib/Transforms/NaCl/ExpandUtils.cpp
new file mode 100644
index 0000000000..6914b6dd84
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandUtils.cpp
@@ -0,0 +1,61 @@
+//===-- ExpandUtils.cpp - Helper functions for expansion passes -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+Instruction *llvm::PhiSafeInsertPt(Use *U) {
+  Instruction *InsertPt = cast<Instruction>(U->getUser());
+  if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) {
+    // We cannot insert instructions before a PHI node, so insert
+    // before the incoming block's terminator.  This could be
+    // suboptimal if the terminator is a conditional.
+    InsertPt = PN->getIncomingBlock(*U)->getTerminator();
+  }
+  return InsertPt;
+}
+
+void llvm::PhiSafeReplaceUses(Use *U, Value *NewVal) {
+  if (PHINode *PN = dyn_cast<PHINode>(U->getUser())) {
+    // A PHI node can have multiple incoming edges from the same
+    // block, in which case all these edges must have the same
+    // incoming value.
+    BasicBlock *BB = PN->getIncomingBlock(*U);
+    for (unsigned I = 0; I < PN->getNumIncomingValues(); ++I) {
+      if (PN->getIncomingBlock(I) == BB)
+        PN->setIncomingValue(I, NewVal);
+    }
+  } else {
+    U->getUser()->replaceUsesOfWith(U->get(), NewVal);
+  }
+}
+
+Instruction *llvm::CopyDebug(Instruction *NewInst, Instruction *Original) {
+  NewInst->setDebugLoc(Original->getDebugLoc());
+  return NewInst;
+}
+
+Function *llvm::RecreateFunction(Function *Func, FunctionType *NewType) {
+  Function *NewFunc = Function::Create(NewType, Func->getLinkage());
+  NewFunc->copyAttributesFrom(Func);
+  Func->getParent()->getFunctionList().insert(Func, NewFunc);
+  NewFunc->takeName(Func);
+  NewFunc->getBasicBlockList().splice(NewFunc->begin(),
+                                      Func->getBasicBlockList());
+  Func->replaceAllUsesWith(
+      ConstantExpr::getBitCast(NewFunc,
+                               Func->getFunctionType()->getPointerTo()));
+  return NewFunc;
+}
diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp
new file mode 100644
index 0000000000..0fd1a3cb44
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp
@@ -0,0 +1,338 @@
+//===- ExpandVarArgs.cpp - Expand out variable argument function calls-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out all use of variable argument functions.
+//
+// This pass replaces a varargs function call with a function call in
+// which a pointer to the variable arguments is passed explicitly.
+// The callee explicitly allocates space for the variable arguments on
+// the stack using "alloca".
+//
+// Alignment:
+//
+// This pass does not add any alignment padding between the arguments
+// that are copied onto the stack.  We assume that the only argument
+// types that need to be handled are 32-bit and 64-bit -- i32, i64,
+// pointers and double:
+//
+//  * We won't see i1, i8, i16 and float as varargs arguments because
+//    the C standard requires the compiler to promote these to the
+//    types "int" and "double".
+//
+//  * We won't see va_arg instructions of struct type because Clang
+//    does not yet support them in PNaCl mode.  See
+//    https://code.google.com/p/nativeclient/issues/detail?id=2381
+//
+// If such arguments do appear in the input, this pass will generate
+// correct, working code, but this code might be inefficient due to
+// using unaligned memory accesses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass recreates functions in
+  // order to change their argument lists.
+  class ExpandVarArgs : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandVarArgs() : ModulePass(ID) {
+      initializeExpandVarArgsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandVarArgs::ID = 0;
+INITIALIZE_PASS(ExpandVarArgs, "expand-varargs",
+                "Expand out variable argument function definitions and calls",
+                false, false)
+
+static void ExpandVarArgFunc(Function *Func) {
+  Type *PtrType = Type::getInt8PtrTy(Func->getContext());
+
+  FunctionType *FTy = Func->getFunctionType();
+  SmallVector<Type *, 8> Params(FTy->param_begin(), FTy->param_end());
+  Params.push_back(PtrType);
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
+  Function *NewFunc = RecreateFunction(Func, NFTy);
+
+  // Declare the new argument as "noalias".
+  NewFunc->setAttributes(
+      Func->getAttributes().addAttribute(
+          Func->getContext(), FTy->getNumParams() + 1, Attribute::NoAlias));
+
+  // Move the arguments across to the new function.
+  for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(),
+         NewArg = NewFunc->arg_begin();
+       Arg != E; ++Arg, ++NewArg) {
+    Arg->replaceAllUsesWith(NewArg);
+    NewArg->takeName(Arg);
+  }
+
+  Func->eraseFromParent();
+
+  Value *VarArgsArg = --NewFunc->arg_end();
+  VarArgsArg->setName("varargs");
+
+  // Expand out uses of llvm.va_start in this function.
+  for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+       BB != E;
+       ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (VAStartInst *VAS = dyn_cast<VAStartInst>(Inst)) {
+        Value *Cast = CopyDebug(new BitCastInst(VAS->getArgList(),
+                                                PtrType->getPointerTo(),
+                                                "arglist", VAS), VAS);
+        CopyDebug(new StoreInst(VarArgsArg, Cast, VAS), VAS);
+        VAS->eraseFromParent();
+      }
+    }
+  }
+}
+
+static void ExpandVAArgInst(VAArgInst *Inst) {
+  // Read the argument.  We assume that no realignment of the pointer
+  // is required.
+  Value *ArgList = CopyDebug(new BitCastInst(
+      Inst->getPointerOperand(),
+      Inst->getType()->getPointerTo()->getPointerTo(), "arglist", Inst), Inst);
+  Value *CurrentPtr = CopyDebug(new LoadInst(ArgList, "arglist_current", Inst),
+                                Inst);
+  Value *Result = CopyDebug(new LoadInst(CurrentPtr, "va_arg", Inst), Inst);
+  Result->takeName(Inst);
+
+  // Update the va_list to point to the next argument.
+  SmallVector<Value *, 1> Indexes;
+  Indexes.push_back(ConstantInt::get(Inst->getContext(), APInt(32, 1)));
+  Value *Next = CopyDebug(GetElementPtrInst::Create(
+                              CurrentPtr, Indexes, "arglist_next", Inst), Inst);
+  CopyDebug(new StoreInst(Next, ArgList, Inst), Inst);
+
+  Inst->replaceAllUsesWith(Result);
+  Inst->eraseFromParent();
+}
+
+static void ExpandVACopyInst(VACopyInst *Inst) {
+  // va_list may have more space reserved, but we only need to
+  // copy a single pointer.
+  Type *PtrTy = Type::getInt8PtrTy(Inst->getContext())->getPointerTo();
+  Value *Src = CopyDebug(new BitCastInst(Inst->getSrc(), PtrTy, "vacopy_src",
+                                         Inst), Inst);
+  Value *Dest = CopyDebug(new BitCastInst(Inst->getDest(), PtrTy, "vacopy_dest",
+                                          Inst), Inst);
+  Value *CurrentPtr = CopyDebug(new LoadInst(Src, "vacopy_currentptr", Inst),
+                                Inst);
+  CopyDebug(new StoreInst(CurrentPtr, Dest, Inst), Inst);
+  Inst->eraseFromParent();
+}
+
+static void LifetimeDecl(Intrinsic::ID id, Value *Ptr, Value *Size,
+                         Instruction *InsertPt) {
+  Module *M = InsertPt->getParent()->getParent()->getParent();
+  Value *Func = Intrinsic::getDeclaration(M, id);
+  SmallVector<Value *, 2> Args;
+  Args.push_back(Size);
+  Args.push_back(Ptr);
+  CallInst::Create(Func, Args, "", InsertPt);
+}
+
+// CopyCall() uses argument overloading so that it can be used by the
+// template ExpandVarArgCall().
+static CallInst *CopyCall(CallInst *Original, Value *Callee,
+                          ArrayRef<Value*> Args) {
+  return CallInst::Create(Callee, Args, "", Original);
+}
+
+static InvokeInst *CopyCall(InvokeInst *Original, Value *Callee,
+                            ArrayRef<Value*> Args) {
+  return InvokeInst::Create(Callee, Original->getNormalDest(),
+                            Original->getUnwindDest(), Args, "", Original);
+}
+
+// ExpandVarArgCall() converts a CallInst or InvokeInst to expand out
+// of varargs.  It returns whether the module was modified.
+template <class InstType>
+static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) {
+  FunctionType *FuncType = cast<FunctionType>(
+      Call->getCalledValue()->getType()->getPointerElementType());
+  if (!FuncType->isFunctionVarArg())
+    return false;
+
+  LLVMContext *Context = &Call->getContext();
+
+  SmallVector<AttributeSet, 8> Attrs;
+  Attrs.push_back(Call->getAttributes().getFnAttributes());
+  Attrs.push_back(Call->getAttributes().getRetAttributes());
+
+  // Split argument list into fixed and variable arguments.
+  SmallVector<Value *, 8> FixedArgs;
+  SmallVector<Value *, 8> VarArgs;
+  SmallVector<Type *, 8> VarArgsTypes;
+  for (unsigned I = 0; I < FuncType->getNumParams(); ++I) {
+    FixedArgs.push_back(Call->getArgOperand(I));
+    // AttributeSets use 1-based indexing.
+    Attrs.push_back(Call->getAttributes().getParamAttributes(I + 1));
+  }
+  for (unsigned I = FuncType->getNumParams();
+       I < Call->getNumArgOperands(); ++I) {
+    Value *ArgVal = Call->getArgOperand(I);
+    VarArgs.push_back(ArgVal);
+    if (Call->getAttributes().hasAttribute(I + 1, Attribute::ByVal)) {
+      // For "byval" arguments we must dereference the pointer.
+      VarArgsTypes.push_back(ArgVal->getType()->getPointerElementType());
+    } else {
+      VarArgsTypes.push_back(ArgVal->getType());
+    }
+  }
+  if (VarArgsTypes.size() == 0) {
+    // Some buggy code (e.g. 176.gcc in Spec2k) uses va_arg on an
+    // empty argument list, which gives undefined behaviour in C.  To
+    // work around such programs, we create a dummy varargs buffer on
+    // the stack even though there are no arguments to put in it.
+    // This allows va_arg to read an undefined value from the stack
+    // rather than crashing by reading from an uninitialized pointer.
+    // An alternative would be to pass a null pointer to catch the
+    // invalid use of va_arg.
+    VarArgsTypes.push_back(Type::getInt32Ty(*Context));
+  }
+
+  // Create struct type for packing variable arguments into.  We
+  // create this as packed for now and assume that no alignment
+  // padding is desired.
+  StructType *VarArgsTy = StructType::get(*Context, VarArgsTypes, true);
+
+  // Allocate space for the variable argument buffer.  Do this at the
+  // start of the function so that we don't leak space if the function
+  // is called in a loop.
+  Function *Func = Call->getParent()->getParent();
+  Instruction *Buf = new AllocaInst(VarArgsTy, "vararg_buffer");
+  Func->getEntryBlock().getInstList().push_front(Buf);
+
+  // Call llvm.lifetime.start/end intrinsics to indicate that Buf is
+  // only used for the duration of the function call, so that the
+  // stack space can be reused elsewhere.
+  Type *I8Ptr = Type::getInt8Ty(*Context)->getPointerTo();
+  Instruction *BufPtr = new BitCastInst(Buf, I8Ptr, "vararg_lifetime_bitcast");
+  BufPtr->insertAfter(Buf);
+  Value *BufSize = ConstantInt::get(*Context,
+                                    APInt(64, DL->getTypeAllocSize(VarArgsTy)));
+  LifetimeDecl(Intrinsic::lifetime_start, BufPtr, BufSize, Call);
+
+  // Copy variable arguments into buffer.
+  int Index = 0;
+  for (SmallVector<Value *, 8>::iterator Iter = VarArgs.begin();
+       Iter != VarArgs.end();
+       ++Iter, ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(*Context, APInt(32, Index)));
+    Value *Ptr = CopyDebug(GetElementPtrInst::Create(
+                               Buf, Indexes, "vararg_ptr", Call), Call);
+    if (Call->getAttributes().hasAttribute(
+            FuncType->getNumParams() + Index + 1, Attribute::ByVal)) {
+      IRBuilder<> Builder(Call);
+      Builder.CreateMemCpy(
+          Ptr, *Iter,
+          DL->getTypeAllocSize((*Iter)->getType()->getPointerElementType()),
+          /* Align= */ 1);
+    } else {
+      CopyDebug(new StoreInst(*Iter, Ptr, Call), Call);
+    }
+  }
+
+  // Cast function to new type to add our extra pointer argument.
+  SmallVector<Type *, 8> ArgTypes(FuncType->param_begin(),
+                                  FuncType->param_end());
+  ArgTypes.push_back(VarArgsTy->getPointerTo());
+  FunctionType *NFTy = FunctionType::get(FuncType->getReturnType(),
+                                         ArgTypes, false);
+  Value *CastFunc =
+    CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(),
+                              "vararg_func", Call), Call);
+
+  // Create the converted function call.
+  FixedArgs.push_back(Buf);
+  InstType *NewCall = CopyCall(Call, CastFunc, FixedArgs);
+  CopyDebug(NewCall, Call);
+  NewCall->setAttributes(AttributeSet::get(Call->getContext(), Attrs));
+  NewCall->takeName(Call);
+
+  if (isa<CallInst>(Call)) {
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize, Call);
+  } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Call)) {
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
+                 Invoke->getNormalDest()->getFirstInsertionPt());
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
+                 Invoke->getUnwindDest()->getFirstInsertionPt());
+  }
+
+  Call->replaceAllUsesWith(NewCall);
+  Call->eraseFromParent();
+
+  return true;
+}
+
+bool ExpandVarArgs::runOnModule(Module &M) {
+  bool Changed = false;
+  DataLayout DL(&M);
+
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *Func = Iter++;
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E;
+         ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+          Changed = true;
+          ExpandVAArgInst(VI);
+        } else if (isa<VAEndInst>(Inst)) {
+          // va_end() is a no-op in this implementation.
+          Changed = true;
+          Inst->eraseFromParent();
+        } else if (VACopyInst *VAC = dyn_cast<VACopyInst>(Inst)) {
+          Changed = true;
+          ExpandVACopyInst(VAC);
+        } else if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Changed |= ExpandVarArgCall(Call, &DL);
+        } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+          Changed |= ExpandVarArgCall(Call, &DL);
+        }
+      }
+    }
+
+    if (Func->isVarArg()) {
+      Changed = true;
+      ExpandVarArgFunc(Func);
+    }
+  }
+
+  return Changed;
+}
+
+ModulePass *llvm::createExpandVarArgsPass() {
+  return new ExpandVarArgs();
+}
diff --git a/lib/Transforms/NaCl/FlattenGlobals.cpp b/lib/Transforms/NaCl/FlattenGlobals.cpp
new file mode 100644
index 0000000000..74ecda168e
--- /dev/null
+++ b/lib/Transforms/NaCl/FlattenGlobals.cpp
@@ -0,0 +1,299 @@
+//===- FlattenGlobals.cpp - Flatten global variable initializers-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts initializers for global variables into a
+// flattened normal form which removes nested struct types and
+// simplifies ConstantExprs.
+//
+// In this normal form, an initializer is either a SimpleElement or a
+// CompoundElement.
+//
+// A SimpleElement is one of the following:
+//
+// 1) An i8 array literal or zeroinitializer:
+//
+//      [SIZE x i8] c"DATA"
+//      [SIZE x i8] zeroinitializer
+//
+// 2) A reference to a GlobalValue (a function or global variable)
+//    with an optional 32-bit byte offset added to it (the addend):
+//
+//      ptrtoint (TYPE* @GLOBAL to i32)
+//      add (i32 ptrtoint (TYPE* @GLOBAL to i32), i32 ADDEND)
+//
+//    We use ptrtoint+add rather than bitcast+getelementptr because
+//    the constructor for getelementptr ConstantExprs performs
+//    constant folding which introduces more complex getelementptrs,
+//    and it is hard to check that they follow a normal form.
+//
+//    For completeness, the pass also allows a BlockAddress as well as
+//    a GlobalValue here, although BlockAddresses are currently not
+//    allowed in the PNaCl ABI, so this should not be considered part
+//    of the normal form.
+//
+// A CompoundElement is a unnamed, packed struct containing only
+// SimpleElements.
+//
+// Limitations:
+//
+// LLVM IR allows ConstantExprs that calculate the difference between
+// two globals' addresses.  FlattenGlobals rejects these because Clang
+// does not generate these and because ELF does not support such
+// relocations in general.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // A FlattenedConstant represents a global variable initializer that
+  // has been flattened and may be converted into the normal form.
+  class FlattenedConstant {
+    LLVMContext *Context;
+    IntegerType *IntPtrType;
+    unsigned PtrSize;
+
+    // A flattened global variable initializer is represented as:
+    // 1) an array of bytes;
+    unsigned BufSize;
+    uint8_t *Buf;
+    uint8_t *BufEnd;
+
+    // 2) an array of relocations.
+    struct Reloc {
+      unsigned RelOffset;  // Offset at which the relocation is to be applied.
+      Constant *GlobalRef;
+    };
+    typedef SmallVector<Reloc, 10> RelocArray;
+    RelocArray Relocs;
+
+    void putAtDest(DataLayout *DL, Constant *Value, uint8_t *Dest);
+
+    Constant *dataSlice(unsigned StartPos, unsigned EndPos) {
+      return ConstantDataArray::get(
+          *Context, ArrayRef<uint8_t>(Buf + StartPos, Buf + EndPos));
+    }
+
+  public:
+    FlattenedConstant(DataLayout *DL, Constant *Value):
+        Context(&Value->getContext()) {
+      IntPtrType = DL->getIntPtrType(*Context);
+      PtrSize = DL->getPointerSize();
+      BufSize = DL->getTypeAllocSize(Value->getType());
+      Buf = new uint8_t[BufSize];
+      BufEnd = Buf + BufSize;
+      memset(Buf, 0, BufSize);
+      putAtDest(DL, Value, Buf);
+    }
+
+    ~FlattenedConstant() {
+      delete[] Buf;
+    }
+
+    Constant *getAsNormalFormConstant();
+  };
+
+  class FlattenGlobals : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    FlattenGlobals() : ModulePass(ID) {
+      initializeFlattenGlobalsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+static void ExpandConstant(DataLayout *DL, Constant *Val,
+                           Constant **ResultGlobal, uint64_t *ResultOffset) {
+  if (isa<GlobalValue>(Val) || isa<BlockAddress>(Val)) {
+    *ResultGlobal = Val;
+    *ResultOffset = 0;
+  } else if (isa<ConstantPointerNull>(Val)) {
+    *ResultGlobal = NULL;
+    *ResultOffset = 0;
+  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    *ResultGlobal = NULL;
+    *ResultOffset = CI->getZExtValue();
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val)) {
+    ExpandConstant(DL, CE->getOperand(0), ResultGlobal, ResultOffset);
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      SmallVector<Value *, 8> Indexes(CE->op_begin() + 1, CE->op_end());
+      *ResultOffset += DL->getIndexedOffset(CE->getOperand(0)->getType(),
+                                            Indexes);
+    } else if (CE->getOpcode() == Instruction::BitCast ||
+               CE->getOpcode() == Instruction::IntToPtr) {
+      // Nothing more to do.
+    } else if (CE->getOpcode() == Instruction::PtrToInt) {
+      if (Val->getType()->getIntegerBitWidth() < DL->getPointerSizeInBits()) {
+        errs() << "Not handled: " << *CE << "\n";
+        report_fatal_error("FlattenGlobals: a ptrtoint that truncates "
+                           "a pointer is not allowed");
+      }
+    } else {
+      errs() << "Not handled: " << *CE << "\n";
+      report_fatal_error(
+          std::string("FlattenGlobals: ConstantExpr opcode not handled: ")
+          + CE->getOpcodeName());
+    }
+  } else {
+    errs() << "Not handled: " << *Val << "\n";
+    report_fatal_error("FlattenGlobals: Constant type not handled for reloc");
+  }
+}
+
+void FlattenedConstant::putAtDest(DataLayout *DL, Constant *Val,
+                                  uint8_t *Dest) {
+  uint64_t ValSize = DL->getTypeAllocSize(Val->getType());
+  assert(Dest + ValSize <= BufEnd);
+  if (isa<ConstantAggregateZero>(Val) ||
+      isa<UndefValue>(Val) ||
+      isa<ConstantPointerNull>(Val)) {
+    // The buffer is already zero-initialized.
+  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    memcpy(Dest, CI->getValue().getRawData(), ValSize);
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val)) {
+    APInt Data = CF->getValueAPF().bitcastToAPInt();
+    assert((Data.getBitWidth() + 7) / 8 == ValSize);
+    assert(Data.getBitWidth() % 8 == 0);
+    memcpy(Dest, Data.getRawData(), ValSize);
+  } else if (ConstantDataSequential *CD =
+             dyn_cast<ConstantDataSequential>(Val)) {
+    // Note that getRawDataValues() assumes the host endianness is the same.
+    StringRef Data = CD->getRawDataValues();
+    assert(Data.size() == ValSize);
+    memcpy(Dest, Data.data(), Data.size());
+  } else if (isa<ConstantArray>(Val) || isa<ConstantVector>(Val)) {
+    uint64_t ElementSize = DL->getTypeAllocSize(
+        Val->getType()->getSequentialElementType());
+    for (unsigned I = 0; I < Val->getNumOperands(); ++I) {
+      putAtDest(DL, cast<Constant>(Val->getOperand(I)), Dest + ElementSize * I);
+    }
+  } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Val)) {
+    const StructLayout *Layout = DL->getStructLayout(CS->getType());
+    for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+      putAtDest(DL, CS->getOperand(I), Dest + Layout->getElementOffset(I));
+    }
+  } else {
+    Constant *GV;
+    uint64_t Offset;
+    ExpandConstant(DL, Val, &GV, &Offset);
+    if (GV) {
+      Constant *NewVal = ConstantExpr::getPtrToInt(GV, IntPtrType);
+      if (Offset) {
+        // For simplicity, require addends to be 32-bit.
+        if ((int64_t) Offset != (int32_t) (uint32_t) Offset) {
+          errs() << "Not handled: " << *Val << "\n";
+          report_fatal_error(
+              "FlattenGlobals: Offset does not fit into 32 bits");
+        }
+        NewVal = ConstantExpr::getAdd(
+            NewVal, ConstantInt::get(IntPtrType, Offset, /* isSigned= */ true));
+      }
+      Reloc NewRel = { Dest - Buf, NewVal };
+      Relocs.push_back(NewRel);
+    } else {
+      memcpy(Dest, &Offset, ValSize);
+    }
+  }
+}
+
+Constant *FlattenedConstant::getAsNormalFormConstant() {
+  // Return a single SimpleElement.
+  if (Relocs.size() == 0)
+    return dataSlice(0, BufSize);
+  if (Relocs.size() == 1 && BufSize == PtrSize) {
+    assert(Relocs[0].RelOffset == 0);
+    return Relocs[0].GlobalRef;
+  }
+
+  // Return a CompoundElement.
+  SmallVector<Constant *, 10> Elements;
+  unsigned PrevPos = 0;
+  for (RelocArray::iterator Rel = Relocs.begin(), E = Relocs.end();
+       Rel != E; ++Rel) {
+    if (Rel->RelOffset > PrevPos)
+      Elements.push_back(dataSlice(PrevPos, Rel->RelOffset));
+    Elements.push_back(Rel->GlobalRef);
+    PrevPos = Rel->RelOffset + PtrSize;
+  }
+  if (PrevPos < BufSize)
+    Elements.push_back(dataSlice(PrevPos, BufSize));
+  return ConstantStruct::getAnon(*Context, Elements, true);
+}
+
+char FlattenGlobals::ID = 0;
+INITIALIZE_PASS(FlattenGlobals, "flatten-globals",
+                "Flatten global variable initializers into byte arrays",
+                false, false)
+
+bool FlattenGlobals::runOnModule(Module &M) {
+  bool Modified = false;
+  DataLayout DL(&M);
+  Type *I8 = Type::getInt8Ty(M.getContext());
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *Global = I++;
+    // Variables with "appending" linkage must always be arrays and so
+    // cannot be normalized, so leave them alone.
+    if (Global->hasAppendingLinkage())
+      continue;
+    Modified = true;
+
+    Type *GlobalType = Global->getType()->getPointerElementType();
+    uint64_t Size = DL.getTypeAllocSize(GlobalType);
+    Constant *NewInit;
+    Type *NewType;
+    if (Global->hasInitializer()) {
+      if (Global->getInitializer()->isNullValue()) {
+        // As an optimization, for large BSS variables, avoid
+        // allocating a buffer that would only be filled with zeroes.
+        NewType = ArrayType::get(I8, Size);
+        NewInit = ConstantAggregateZero::get(NewType);
+      } else {
+        FlattenedConstant Buffer(&DL, Global->getInitializer());
+        NewInit = Buffer.getAsNormalFormConstant();
+        NewType = NewInit->getType();
+      }
+    } else {
+      NewInit = NULL;
+      NewType = ArrayType::get(I8, Size);
+    }
+    GlobalVariable *NewGlobal = new GlobalVariable(
+        M, NewType,
+        Global->isConstant(),
+        Global->getLinkage(),
+        NewInit, "", Global,
+        Global->getThreadLocalMode());
+    NewGlobal->copyAttributesFrom(Global);
+    if (NewGlobal->getAlignment() == 0)
+      NewGlobal->setAlignment(DL.getPrefTypeAlignment(GlobalType));
+    NewGlobal->setExternallyInitialized(Global->isExternallyInitialized());
+    NewGlobal->takeName(Global);
+    Global->replaceAllUsesWith(
+        ConstantExpr::getBitCast(NewGlobal, Global->getType()));
+    Global->eraseFromParent();
+  }
+  return Modified;
+
+}
+
+ModulePass *llvm::createFlattenGlobalsPass() {
+  return new FlattenGlobals();
+}
diff --git a/lib/Transforms/NaCl/GlobalCleanup.cpp b/lib/Transforms/NaCl/GlobalCleanup.cpp
new file mode 100644
index 0000000000..55886f8f7d
--- /dev/null
+++ b/lib/Transforms/NaCl/GlobalCleanup.cpp
@@ -0,0 +1,118 @@
+//===- GlobalCleanup.cpp - Cleanup global symbols post-bitcode-link -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// ===---------------------------------------------------------------------===//
+//
+// PNaCl executables should have no external symbols or aliases. These passes
+// internalize (or otherwise remove/resolve) GlobalValues and resolve all
+// GlobalAliases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class GlobalCleanup : public ModulePass {
+  public:
+    static char ID;
+    GlobalCleanup() : ModulePass(ID) {
+      initializeGlobalCleanupPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnModule(Module &M);
+  };
+
+  class ResolveAliases : public ModulePass {
+  public:
+    static char ID;
+    ResolveAliases() : ModulePass(ID) {
+      initializeResolveAliasesPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char GlobalCleanup::ID = 0;
+INITIALIZE_PASS(GlobalCleanup, "nacl-global-cleanup",
+                "GlobalValue cleanup for PNaCl", false, false)
+
+static bool CleanUpLinkage(GlobalValue *GV) {
+  // TODO(dschuff): handle the rest of the linkage types as necessary without
+  // running afoul of the IR verifier or breaking the native link
+  switch (GV->getLinkage()) {
+    case GlobalValue::ExternalWeakLinkage: {
+      Constant *NullRef = Constant::getNullValue(GV->getType());
+      GV->replaceAllUsesWith(NullRef);
+      GV->eraseFromParent();
+      return true;
+    }
+    case GlobalValue::WeakAnyLinkage: {
+      GV->setLinkage(GlobalValue::InternalLinkage);
+      return true;
+    }
+    default:
+      // default with fall through to avoid compiler warning
+      return false;
+  }
+  return false;
+}
+
+bool GlobalCleanup::runOnModule(Module &M) {
+  bool Modified = false;
+
+  if (GlobalVariable *GV = M.getNamedGlobal("llvm.compiler.used")) {
+    GV->eraseFromParent();
+    Modified = true;
+  }
+  if (GlobalVariable *GV = M.getNamedGlobal("llvm.used")) {
+    GV->eraseFromParent();
+    Modified = true;
+  }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *GV = I++;
+    Modified |= CleanUpLinkage(GV);
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function *F = I++;
+    Modified |= CleanUpLinkage(F);
+  }
+  return Modified;
+}
+
+ModulePass *llvm::createGlobalCleanupPass() {
+  return new GlobalCleanup();
+}
+
+char ResolveAliases::ID = 0;
+INITIALIZE_PASS(ResolveAliases, "resolve-aliases",
+                "resolve global variable and function aliases", false, false)
+
+bool ResolveAliases::runOnModule(Module &M) {
+  bool Modified = false;
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ) {
+    GlobalAlias *Alias = I++;
+    Alias->replaceAllUsesWith(Alias->getAliasee());
+    Alias->eraseFromParent();
+    Modified = true;
+  }
+  return Modified;
+}
+
+ModulePass *llvm::createResolveAliasesPass() {
+  return new ResolveAliases();
+}
diff --git a/lib/Transforms/NaCl/InsertDivideCheck.cpp b/lib/Transforms/NaCl/InsertDivideCheck.cpp
new file mode 100644
index 0000000000..f1deae618f
--- /dev/null
+++ b/lib/Transforms/NaCl/InsertDivideCheck.cpp
@@ -0,0 +1,112 @@
+//===- InsertDivideCheck.cpp - Add divide by zero checks ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds a check for divide by zero before every integer DIV or REM.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "add-divide-check"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class InsertDivideCheck : public FunctionPass {
+  public:
+    static char ID;
+    InsertDivideCheck() : FunctionPass(ID) {
+      initializeInsertDivideCheckPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+static BasicBlock *CreateTrapBlock(Function &F, DebugLoc dl) {
+  BasicBlock *TrapBlock = BasicBlock::Create(F.getContext(), "divrem.by.zero",
+                                             &F);
+  Value *TrapFn = Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap);
+  CallInst::Create(TrapFn, "", TrapBlock)->setDebugLoc(dl);
+  (new UnreachableInst(F.getContext(), TrapBlock))->setDebugLoc(dl);
+  return TrapBlock;
+}
+
+bool InsertDivideCheck::runOnFunction(Function &F) {
+  SmallPtrSet<Instruction*, 8> GuardedDivs;
+  // If the pass finds a DIV/REM that needs to be checked for zero denominator,
+  // it will insert a new "trap" block, and split the block that contains the
+  // DIV/REM into two blocks.  The new BasicBlocks are added after the current
+  // BasicBlock, so that if there is more than one DIV/REM in the same block,
+  // all are visited.
+  for (Function::iterator I = F.begin(); I != F.end(); I++) {
+    BasicBlock *BB = I;
+
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; BI++) {
+      BinaryOperator *DivInst = dyn_cast<BinaryOperator>(BI);
+      if (!DivInst || (GuardedDivs.count(DivInst) != 0))
+        continue;
+      unsigned Opcode = DivInst->getOpcode();
+      if (Opcode != Instruction::SDiv && Opcode != Instruction::UDiv &&
+          Opcode != Instruction::SRem && Opcode != Instruction::URem)
+        continue;
+      Value *Denominator = DivInst->getOperand(1);
+      if (!Denominator->getType()->isIntegerTy())
+        continue;
+      DebugLoc dl = DivInst->getDebugLoc();
+      if (ConstantInt *DenomConst = dyn_cast<ConstantInt>(Denominator)) {
+        // Divides by constants do not need a denominator test.
+        if (DenomConst->isZero()) {
+          // For explicit divides by zero, insert a trap before DIV/REM
+          Value *TrapFn = Intrinsic::getDeclaration(F.getParent(),
+                                                    Intrinsic::trap);
+          CallInst::Create(TrapFn, "", DivInst)->setDebugLoc(dl);
+        }
+        continue;
+      }
+      // Create a trap block.
+      BasicBlock *TrapBlock = CreateTrapBlock(F, dl);
+      // Move instructions in BB from DivInst to BB's end to a new block.
+      BasicBlock *Successor = BB->splitBasicBlock(BI, "guarded.divrem");
+      // Remove the unconditional branch inserted by splitBasicBlock.
+      BB->getTerminator()->eraseFromParent();
+      // Remember that DivInst was already processed, so that when we process
+      // inserted blocks later, we do not attempt to again guard it.
+      GuardedDivs.insert(DivInst);
+      // Compare the denominator with zero.
+      Value *Zero = ConstantInt::get(Denominator->getType(), 0);
+      Value *DenomIsZero = new ICmpInst(*BB, ICmpInst::ICMP_EQ, Denominator,
+                                        Zero, "");
+      // Put in a condbranch to the trap block.
+      BranchInst::Create(TrapBlock, Successor, DenomIsZero, BB);
+      // BI is invalidated when we split.  Stop the BasicBlock iterator.
+      break;
+    }
+  }
+
+  return false;
+}
+
+char InsertDivideCheck::ID = 0;
+INITIALIZE_PASS(InsertDivideCheck, "insert-divide-check",
+                "Insert divide by zero checks", false, false)
+
+FunctionPass *llvm::createInsertDivideCheckPass() {
+  return new InsertDivideCheck();
+}
diff --git a/lib/Transforms/NaCl/LLVMBuild.txt b/lib/Transforms/NaCl/LLVMBuild.txt
new file mode 100644
index 0000000000..051a0d30ed
--- /dev/null
+++ b/lib/Transforms/NaCl/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/NaCl/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClTransforms
+parent = Transforms
+library_name = NaClTransforms
+required_libraries = Core Support IPO
diff --git a/lib/Transforms/NaCl/Makefile b/lib/Transforms/NaCl/Makefile
new file mode 100644
index 0000000000..f297b753d7
--- /dev/null
+++ b/lib/Transforms/NaCl/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/NaCl/Makefile-------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMNaClTransforms
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp
new file mode 100644
index 0000000000..f8f78135a8
--- /dev/null
+++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp
@@ -0,0 +1,111 @@
+//===-- PNaClABISimplify.cpp - Lists PNaCl ABI simplification passes ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the meta-passes "-pnacl-abi-simplify-preopt"
+// and "-pnacl-abi-simplify-postopt".  It lists their constituent
+// passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+void llvm::PNaClABISimplifyAddPreOptPasses(PassManager &PM) {
+  // Internalize all symbols in the module except _start, which is the only
+  // symbol a stable PNaCl pexe is allowed to export.
+  const char *SymbolsToPreserve[] = { "_start" };
+  PM.add(createInternalizePass(SymbolsToPreserve));
+
+  // LowerExpect converts Intrinsic::expect into branch weights,
+  // which can then be removed after BlockPlacement.
+  PM.add(createLowerExpectIntrinsicPass());
+  // Rewrite unsupported intrinsics to simpler constructs.
+  PM.add(createRewriteLLVMIntrinsicsPass());
+  // LowerInvoke prevents use of C++ exception handling, which is not
+  // yet supported in the PNaCl ABI.
+  PM.add(createLowerInvokePass());
+  // Remove landingpad blocks made unreachable by LowerInvoke.
+  PM.add(createCFGSimplificationPass());
+
+  // Expand out some uses of struct types.
+  PM.add(createExpandArithWithOverflowPass());
+  // ExpandStructRegs must be run after ExpandArithWithOverflow to
+  // expand out the insertvalue instructions that
+  // ExpandArithWithOverflow introduces.
+  PM.add(createExpandStructRegsPass());
+
+  PM.add(createExpandVarArgsPass());
+  PM.add(createExpandCtorsPass());
+  PM.add(createResolveAliasesPass());
+  PM.add(createExpandTlsPass());
+  // GlobalCleanup needs to run after ExpandTls because
+  // __tls_template_start etc. are extern_weak before expansion
+  PM.add(createGlobalCleanupPass());
+}
+
+void llvm::PNaClABISimplifyAddPostOptPasses(PassManager &PM) {
+  PM.add(createRewritePNaClLibraryCallsPass());
+
+  // We place ExpandByVal after optimization passes because some byval
+  // arguments can be expanded away by the ArgPromotion pass.  Leaving
+  // in "byval" during optimization also allows some dead stores to be
+  // eliminated, because "byval" is a stronger constraint than what
+  // ExpandByVal expands it to.
+  PM.add(createExpandByValPass());
+
+  // We place ExpandSmallArguments after optimization passes because
+  // some optimizations undo its changes.  Note that
+  // ExpandSmallArguments requires that ExpandVarArgs has already been
+  // run.
+  PM.add(createExpandSmallArgumentsPass());
+
+  PM.add(createPromoteI1OpsPass());
+
+  // Optimization passes and ExpandByVal introduce
+  // memset/memcpy/memmove intrinsics with a 64-bit size argument.
+  // This pass converts those arguments to 32-bit.
+  PM.add(createCanonicalizeMemIntrinsicsPass());
+
+  // We place StripMetadata after optimization passes because
+  // optimizations depend on the metadata.
+  PM.add(createStripMetadataPass());
+
+  // FlattenGlobals introduces ConstantExpr bitcasts of globals which
+  // are expanded out later.
+  PM.add(createFlattenGlobalsPass());
+
+  // We should not place arbitrary passes after ExpandConstantExpr
+  // because they might reintroduce ConstantExprs.
+  PM.add(createExpandConstantExprPass());
+  // PromoteIntegersPass does not handle constexprs and creates GEPs,
+  // so it goes between those passes.
+  PM.add(createPromoteIntegersPass());
+  // ExpandGetElementPtr must follow ExpandConstantExpr to expand the
+  // getelementptr instructions it creates.
+  PM.add(createExpandGetElementPtrPass());
+  // ReplacePtrsWithInts assumes that getelementptr instructions and
+  // ConstantExprs have already been expanded out.
+  PM.add(createReplacePtrsWithIntsPass());
+
+  // We place StripAttributes after optimization passes because many
+  // analyses add attributes to reflect their results.
+  // StripAttributes must come after ExpandByVal and
+  // ExpandSmallArguments.
+  PM.add(createStripAttributesPass());
+
+  // Strip dead prototytes to appease the intrinsic ABI checks.
+  // ExpandVarArgs leaves around vararg intrinsics, and
+  // ReplacePtrsWithInts leaves the lifetime.start/end intrinsics.
+  PM.add(createStripDeadPrototypesPass());
+}
diff --git a/lib/Transforms/NaCl/PromoteI1Ops.cpp b/lib/Transforms/NaCl/PromoteI1Ops.cpp
new file mode 100644
index 0000000000..dccf081e26
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteI1Ops.cpp
@@ -0,0 +1,138 @@
+//===- PromoteI1Ops.cpp - Promote various operations on the i1 type--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out various operations on the i1 type so that
+// these i1 operations do not need to be supported by the PNaCl
+// translator.
+//
+// This is similar to the PromoteIntegers pass in that it removes uses
+// of an unusual-size integer type.  The difference is that i1 remains
+// a valid type in other operations.  i1 can still be used in phi
+// nodes, "select" instructions, in "sext" and "zext", and so on.  In
+// contrast, the integer types that PromoteIntegers removes are not
+// allowed in any context by PNaCl's ABI verifier.
+//
+// This pass expands out the following:
+//
+//  * i1 loads and stores.
+//  * All i1 comparisons and arithmetic operations, with the exception
+//    of "and", "or" and "xor", because these are used in practice and
+//    don't overflow.
+//
+// "switch" instructions on i1 are also disallowed by the PNaCl ABI
+// verifier, but they don't seem to be generated in practice and so
+// they are not currently expanded out by this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class PromoteI1Ops : public BasicBlockPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PromoteI1Ops() : BasicBlockPass(ID) {
+      initializePromoteI1OpsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnBasicBlock(BasicBlock &BB);
+  };
+}
+
+char PromoteI1Ops::ID = 0;
+INITIALIZE_PASS(PromoteI1Ops, "nacl-promote-i1-ops",
+                "Promote various operations on the i1 type",
+                false, false)
+
+static Value *promoteValue(Value *Val, bool SignExt, Instruction *InsertPt) {
+  Instruction::CastOps CastType =
+      SignExt ? Instruction::SExt : Instruction::ZExt;
+  return CopyDebug(CastInst::Create(CastType, Val,
+                                    Type::getInt8Ty(Val->getContext()),
+                                    Val->getName() + ".expand_i1_val",
+                                    InsertPt), InsertPt);
+}
+
+bool PromoteI1Ops::runOnBasicBlock(BasicBlock &BB) {
+  bool Changed = false;
+
+  Type *I1Ty = Type::getInt1Ty(BB.getContext());
+  Type *I8Ty = Type::getInt8Ty(BB.getContext());
+
+  for (BasicBlock::iterator Iter = BB.begin(), E = BB.end(); Iter != E; ) {
+    Instruction *Inst = Iter++;
+    if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+      if (Load->getType() == I1Ty) {
+        Changed = true;
+        Value *Ptr = CopyDebug(
+            new BitCastInst(
+                Load->getPointerOperand(), I8Ty->getPointerTo(),
+                Load->getPointerOperand()->getName() + ".i8ptr", Load), Load);
+        LoadInst *NewLoad = new LoadInst(
+            Ptr, Load->getName() + ".pre_trunc", Load);
+        CopyDebug(NewLoad, Load);
+        CopyLoadOrStoreAttrs(NewLoad, Load);
+        Value *Result = CopyDebug(new TruncInst(NewLoad, I1Ty, "", Load), Load);
+        Result->takeName(Load);
+        Load->replaceAllUsesWith(Result);
+        Load->eraseFromParent();
+      }
+    } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+      if (Store->getValueOperand()->getType() == I1Ty) {
+        Changed = true;
+        Value *Ptr = CopyDebug(
+            new BitCastInst(
+                Store->getPointerOperand(), I8Ty->getPointerTo(),
+                Store->getPointerOperand()->getName() + ".i8ptr", Store),
+            Store);
+        Value *Val = promoteValue(Store->getValueOperand(), false, Store);
+        StoreInst *NewStore = new StoreInst(Val, Ptr, Store);
+        CopyDebug(NewStore, Store);
+        CopyLoadOrStoreAttrs(NewStore, Store);
+        Store->eraseFromParent();
+      }
+    } else if (BinaryOperator *Op = dyn_cast<BinaryOperator>(Inst)) {
+      if (Op->getType() == I1Ty &&
+          !(Op->getOpcode() == Instruction::And ||
+            Op->getOpcode() == Instruction::Or ||
+            Op->getOpcode() == Instruction::Xor)) {
+        Value *Arg1 = promoteValue(Op->getOperand(0), false, Op);
+        Value *Arg2 = promoteValue(Op->getOperand(1), false, Op);
+        Value *NewOp = CopyDebug(
+            BinaryOperator::Create(
+                Op->getOpcode(), Arg1, Arg2,
+                Op->getName() + ".pre_trunc", Op), Op);
+        Value *Result = CopyDebug(new TruncInst(NewOp, I1Ty, "", Op), Op);
+        Result->takeName(Op);
+        Op->replaceAllUsesWith(Result);
+        Op->eraseFromParent();
+      }
+    } else if (ICmpInst *Op = dyn_cast<ICmpInst>(Inst)) {
+      if (Op->getOperand(0)->getType() == I1Ty) {
+        Value *Arg1 = promoteValue(Op->getOperand(0), Op->isSigned(), Op);
+        Value *Arg2 = promoteValue(Op->getOperand(1), Op->isSigned(), Op);
+        Value *Result = CopyDebug(
+            new ICmpInst(Op, Op->getPredicate(), Arg1, Arg2, ""), Op);
+        Result->takeName(Op);
+        Op->replaceAllUsesWith(Result);
+        Op->eraseFromParent();
+      }
+    }
+  }
+  return Changed;
+}
+
+BasicBlockPass *llvm::createPromoteI1OpsPass() {
+  return new PromoteI1Ops();
+}
diff --git a/lib/Transforms/NaCl/PromoteIntegers.cpp b/lib/Transforms/NaCl/PromoteIntegers.cpp
new file mode 100644
index 0000000000..017e4976f2
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteIntegers.cpp
@@ -0,0 +1,653 @@
+//===- PromoteIntegers.cpp - Promote illegal integers for PNaCl ABI -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// A limited set of transformations to promote illegal-sized int types.
+//
+//===----------------------------------------------------------------------===//
+//
+// Legal sizes are currently 1, 8, 16, 32, 64 (and higher, see note below)
+// Operations on illegal integers and int pointers are be changed to operate
+// on the next-higher legal size.
+// It always maintains the invariant that the upper bits (above the size of the
+// original type) are zero; therefore after operations which can overwrite these
+// bits (e.g. add, shl, sext), the bits are cleared.
+//
+// Limitations:
+// 1) It can't change function signatures or global variables
+// 2) It won't promote (and can't expand) types larger than i64
+// 3) Doesn't support mul/div operators
+// 4) Doesn't handle arrays or structs (or GEPs) with illegal types
+// 5) Doesn't handle constant expressions
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IntegersSubset.h"
+#include "llvm/Support/IntegersSubsetMapping.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+class PromoteIntegers : public FunctionPass {
+ public:
+  static char ID;
+  PromoteIntegers() : FunctionPass(ID) {
+    initializePromoteIntegersPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnFunction(Function &F);
+};
+}
+
+char PromoteIntegers::ID = 0;
+INITIALIZE_PASS(PromoteIntegers, "nacl-promote-ints",
+                "Promote integer types which are illegal in PNaCl",
+                false, false)
+
+// Legal sizes are currently 1, 8, 16, 32, and 64.
+// We can't yet expand types above 64 bit, so don't try to touch them for now.
+// TODO(dschuff): expand >64bit types or disallow >64bit packed bitfields.
+// There are currently none in our tests that use the ABI checker.
+// See https://code.google.com/p/nativeclient/issues/detail?id=3360
+static bool isLegalSize(unsigned Size) {
+  if (Size > 64) return true;
+  return Size == 1 || Size == 8 || Size == 16 || Size == 32 || Size == 64;
+}
+
+static Type *getPromotedIntType(IntegerType *Ty) {
+  unsigned Width = Ty->getBitWidth();
+  assert(Width <= 64 && "Don't know how to legalize >64 bit types yet");
+  if (isLegalSize(Width))
+    return Ty;
+  return IntegerType::get(Ty->getContext(),
+                          Width < 8 ? 8 : NextPowerOf2(Width));
+}
+
+// Return a legal integer or pointer-to-integer type, promoting to a larger
+// size if necessary.
+static Type *getPromotedType(Type *Ty) {
+  assert((isa<IntegerType>(Ty) ||
+          (isa<PointerType>(Ty) && isa<IntegerType>(Ty->getContainedType(0))))
+         && "Trying to convert a non-integer type");
+
+  if (isa<PointerType>(Ty))
+    return getPromotedIntType(
+        cast<IntegerType>(Ty->getContainedType(0)))->getPointerTo();
+
+  return getPromotedIntType(cast<IntegerType>(Ty));
+}
+
+// Return true if Val is an int or pointer-to-int which should be converted.
+static bool shouldConvert(Value *Val) {
+  Type *Ty = Val->getType();
+  if (PointerType *Pty = dyn_cast<PointerType>(Ty))
+    Ty = Pty->getContainedType(0);
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+    if (!isLegalSize(ITy->getBitWidth())) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Return a constant which has been promoted to a legal size.
+static Value *convertConstant(Constant *C, bool SignExt=false) {
+  assert(shouldConvert(C));
+  if (isa<UndefValue>(C)) {
+    return UndefValue::get(getPromotedType(C->getType()));
+  } else if (ConstantInt *CInt = dyn_cast<ConstantInt>(C)) {
+    return ConstantInt::get(
+        getPromotedType(C->getType()),
+        SignExt ? CInt->getSExtValue() : CInt->getZExtValue(),
+        /*isSigned=*/SignExt);
+  } else {
+    errs() << "Value: " << *C << "\n";
+    report_fatal_error("Unexpected constant value");
+  }
+}
+
+// Holds the state for converting/replacing values. Conversion is done in one
+// pass, with each value requiring conversion possibly having two stages. When
+// an instruction needs to be replaced (i.e. it has illegal operands or result)
+// a new instruction is created, and the pass calls getConverted to get its
+// operands. If the original operand has already been converted, the new value
+// is returned. Otherwise, a placeholder is created and used in the new
+// instruction. After a new instruction is created to replace an illegal one,
+// recordConverted is called to register the replacement. All users are updated,
+// and if there is a placeholder, its users are also updated.
+// recordConverted also queues the old value for deletion.
+// This strategy avoids the need for recursion or worklists for conversion.
+class ConversionState {
+ public:
+  // Return the promoted value for Val. If Val has not yet been converted,
+  // return a placeholder, which will be converted later.
+  Value *getConverted(Value *Val) {
+    if (!shouldConvert(Val))
+        return Val;
+    if (isa<GlobalVariable>(Val))
+      report_fatal_error("Can't convert illegal GlobalVariables");
+    if (RewrittenMap.count(Val))
+      return RewrittenMap[Val];
+    Value *P;
+    // Directly convert constants.
+    if (Constant *C = dyn_cast<Constant>(Val)) {
+      return convertConstant(C, /*SignExt=*/false);
+    } else {
+      // No converted value available yet, so create a placeholder.
+      P = new Argument(getPromotedType(Val->getType()));
+    }
+    RewrittenMap[Val] = P;
+    Placeholders[Val] = P;
+    return P;
+  }
+
+  // Replace the uses of From with To, replace the uses of any
+  // placeholders for From, and optionally give From's name to To.
+  // Also mark To for deletion.
+  void recordConverted(Instruction *From, Value *To, bool TakeName=true) {
+    ToErase.push_back(From);
+    if (!shouldConvert(From)) {
+      // From does not produce an illegal value, update its users in place.
+      From->replaceAllUsesWith(To);
+    } else {
+      // From produces an illegal value, so its users will be replaced. When
+      // replacements are created they will use values returned by getConverted.
+      if (Placeholders.count(From)) {
+        // Users of the placeholder can be updated in place.
+        Placeholders[From]->replaceAllUsesWith(To);
+        Placeholders.erase(From);
+      }
+      RewrittenMap[From] = To;
+    }
+    if (TakeName) {
+      To->takeName(From);
+    }
+  }
+
+  void eraseReplacedInstructions() {
+    for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+             E = ToErase.end(); I != E; ++I)
+      (*I)->dropAllReferences();
+    for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+             E = ToErase.end(); I != E; ++I)
+      (*I)->eraseFromParent();
+  }
+
+ private:
+  // Maps illegal values to their new converted values (or placeholders
+  // if no new value is available yet)
+  DenseMap<Value *, Value *> RewrittenMap;
+  // Maps illegal values with no conversion available yet to their placeholders
+  DenseMap<Value *, Value *> Placeholders;
+  // Illegal values which have already been converted, will be erased.
+  SmallVector<Instruction *, 8> ToErase;
+};
+
+// Split an illegal load into multiple legal loads and return the resulting
+// promoted value. The size of the load is assumed to be a multiple of 8.
+static Value *splitLoad(LoadInst *Inst, ConversionState &State) {
+  if (Inst->isVolatile() || Inst->isAtomic())
+    report_fatal_error("Can't split volatile/atomic loads");
+  if (cast<IntegerType>(Inst->getType())->getBitWidth() % 8 != 0)
+    report_fatal_error("Loads must be a multiple of 8 bits");
+
+  Value *OrigPtr = State.getConverted(Inst->getPointerOperand());
+  // OrigPtr is a placeholder in recursive calls, and so has no name
+  if (OrigPtr->getName().empty())
+    OrigPtr->setName(Inst->getPointerOperand()->getName());
+  unsigned Width = cast<IntegerType>(Inst->getType())->getBitWidth();
+  Type *NewType = getPromotedType(Inst->getType());
+  unsigned LoWidth = Width;
+
+  while (!isLegalSize(LoWidth)) LoWidth -= 8;
+  IntegerType *LoType = IntegerType::get(Inst->getContext(), LoWidth);
+  IntegerType *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth);
+  IRBuilder<> IRB(Inst->getParent(), Inst);
+
+  Value *BCLo = IRB.CreateBitCast(
+      OrigPtr,
+      LoType->getPointerTo(),
+      OrigPtr->getName() + ".loty");
+  Value *LoadLo = IRB.CreateAlignedLoad(
+      BCLo, Inst->getAlignment(), Inst->getName() + ".lo");
+  Value *LoExt = IRB.CreateZExt(LoadLo, NewType, LoadLo->getName() + ".ext");
+  Value *GEPHi = IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi");
+  Value *BCHi = IRB.CreateBitCast(
+        GEPHi,
+        HiType->getPointerTo(),
+        OrigPtr->getName() + ".hity");
+
+  Value *LoadHi = IRB.CreateLoad(BCHi, Inst->getName() + ".hi");
+  if (!isLegalSize(Width - LoWidth)) {
+    LoadHi = splitLoad(cast<LoadInst>(LoadHi), State);
+    // BCHi was still illegal, and has been replaced with a placeholder in the
+    // recursive call. Since it is redundant with BCLo in the recursive call,
+    // just splice it out entirely.
+    State.recordConverted(cast<Instruction>(BCHi), GEPHi, /*TakeName=*/false);
+  }
+
+  Value *HiExt = IRB.CreateZExt(LoadHi, NewType, LoadHi->getName() + ".ext");
+  Value *HiShift = IRB.CreateShl(HiExt, LoWidth, HiExt->getName() + ".sh");
+  Value *Result = IRB.CreateOr(LoExt, HiShift);
+
+  State.recordConverted(Inst, Result);
+
+  return Result;
+}
+
+static Value *splitStore(StoreInst *Inst, ConversionState &State) {
+  if (Inst->isVolatile() || Inst->isAtomic())
+    report_fatal_error("Can't split volatile/atomic stores");
+  if (cast<IntegerType>(Inst->getValueOperand()->getType())->getBitWidth() % 8
+      != 0)
+    report_fatal_error("Stores must be a multiple of 8 bits");
+
+  Value *OrigPtr = State.getConverted(Inst->getPointerOperand());
+  // OrigPtr is now a placeholder in recursive calls, and so has no name.
+  if (OrigPtr->getName().empty())
+    OrigPtr->setName(Inst->getPointerOperand()->getName());
+  Value *OrigVal = State.getConverted(Inst->getValueOperand());
+  unsigned Width = cast<IntegerType>(
+      Inst->getValueOperand()->getType())->getBitWidth();
+  unsigned LoWidth = Width;
+
+  while (!isLegalSize(LoWidth)) LoWidth -= 8;
+  IntegerType *LoType = IntegerType::get(Inst->getContext(), LoWidth);
+  IntegerType *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth);
+  IRBuilder<> IRB(Inst->getParent(), Inst);
+
+  Value *BCLo = IRB.CreateBitCast(
+      OrigPtr,
+      LoType->getPointerTo(),
+      OrigPtr->getName() + ".loty");
+  Value *LoTrunc = IRB.CreateTrunc(
+      OrigVal, LoType, OrigVal->getName() + ".lo");
+  IRB.CreateAlignedStore(LoTrunc, BCLo, Inst->getAlignment());
+
+  Value *HiLShr = IRB.CreateLShr(
+      OrigVal, LoWidth, OrigVal->getName() + ".hi.sh");
+  Value *GEPHi = IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi");
+  Value *HiTrunc = IRB.CreateTrunc(
+      HiLShr, HiType, OrigVal->getName() + ".hi");
+  Value *BCHi = IRB.CreateBitCast(
+        GEPHi,
+        HiType->getPointerTo(),
+        OrigPtr->getName() + ".hity");
+
+  Value *StoreHi = IRB.CreateStore(HiTrunc, BCHi);
+
+  if (!isLegalSize(Width - LoWidth)) {
+    // HiTrunc is still illegal, and is redundant with the truncate in the
+    // recursive call, so just get rid of it.
+    State.recordConverted(cast<Instruction>(HiTrunc), HiLShr,
+                          /*TakeName=*/false);
+    StoreHi = splitStore(cast<StoreInst>(StoreHi), State);
+    // BCHi was still illegal, and has been replaced with a placeholder in the
+    // recursive call. Since it is redundant with BCLo in the recursive call,
+    // just splice it out entirely.
+    State.recordConverted(cast<Instruction>(BCHi), GEPHi, /*TakeName=*/false);
+  }
+  State.recordConverted(Inst, StoreHi, /*TakeName=*/false);
+  return StoreHi;
+}
+
+// Return a value with the bits of the operand above the size of the original
+// type cleared. The operand is assumed to have been legalized already.
+static Value *getClearUpper(Value *Operand, Type *OrigType,
+                            Instruction *InsertPt) {
+  // If the operand is a constant, it will have been created by
+  // ConversionState.getConverted, which zero-extends by default.
+  if (isa<Constant>(Operand))
+    return Operand;
+  return BinaryOperator::Create(
+      Instruction::And,
+      Operand,
+      ConstantInt::get(
+          getPromotedType(OrigType),
+          APInt::getLowBitsSet(getPromotedType(OrigType)->getIntegerBitWidth(),
+                               OrigType->getIntegerBitWidth())),
+      Operand->getName() + ".clear",
+      InsertPt);
+}
+
+// Return a value with the bits of the operand above the size of the original
+// type equal to the sign bit of the original operand. The new operand is
+// assumed to have been legalized already.
+// This is done by shifting the sign bit of the smaller value up to the MSB
+// position in the larger size, and then arithmetic-shifting it back down.
+static Value *getSignExtend(Value *Operand, Value *OrigOperand,
+                            Instruction *InsertPt) {
+  // If OrigOperand was a constant, NewOperand will have been created by
+  // ConversionState.getConverted, which zero-extends by default. But that is
+  // wrong here, so replace it with a sign-extended constant.
+  if (Constant *C = dyn_cast<Constant>(OrigOperand))
+    return convertConstant(C, /*SignExt=*/true);
+  Type *OrigType = OrigOperand->getType();
+  ConstantInt *ShiftAmt = ConstantInt::getSigned(
+      cast<IntegerType>(getPromotedType(OrigType)),
+      getPromotedType(OrigType)->getIntegerBitWidth() -
+        OrigType->getIntegerBitWidth());
+  BinaryOperator *Shl = BinaryOperator::Create(
+      Instruction::Shl,
+      Operand,
+      ShiftAmt,
+      Operand->getName() + ".getsign",
+      InsertPt);
+  return BinaryOperator::Create(
+      Instruction::AShr,
+      Shl,
+      ShiftAmt,
+      Operand->getName() + ".signed",
+      InsertPt);
+}
+
+static void convertInstruction(Instruction *Inst, ConversionState &State) {
+  if (SExtInst *Sext = dyn_cast<SExtInst>(Inst)) {
+    Value *Op = Sext->getOperand(0);
+    Value *NewInst = NULL;
+    // If the operand to be extended is illegal, we first need to fill its
+    // upper bits (which are zero) with its sign bit.
+    if (shouldConvert(Op)) {
+      NewInst = getSignExtend(State.getConverted(Op), Op, Sext);
+    }
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we won't actually be changing the type of the
+    // variable, just its value.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Sext->getType())) {
+      NewInst = new SExtInst(
+          NewInst ? NewInst : State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Sext->getType())),
+          Sext->getName() + ".sext", Sext);
+    }
+    // Now all the bits of the result are correct, but we need to restore
+    // the bits above its type to zero.
+    if (shouldConvert(Sext)) {
+      NewInst = getClearUpper(NewInst, Sext->getType(), Sext);
+    }
+    assert(NewInst && "Failed to convert sign extension");
+    State.recordConverted(Sext, NewInst);
+  } else if (ZExtInst *Zext = dyn_cast<ZExtInst>(Inst)) {
+    Value *Op = Zext->getOperand(0);
+    Value *NewInst = NULL;
+    // TODO(dschuff): Some of these zexts could be no-ops.
+    if (shouldConvert(Op)) {
+      NewInst = getClearUpper(State.getConverted(Op),
+                              Op->getType(),
+                              Zext);
+    }
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we won't actually be changing the type of the
+    // variable, just its value.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Zext->getType())) {
+      NewInst = CastInst::CreateZExtOrBitCast(
+          NewInst ? NewInst : State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Zext->getType())),
+          "", Zext);
+    }
+    assert(NewInst);
+    State.recordConverted(Zext, NewInst);
+  } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) {
+    Value *Op = Trunc->getOperand(0);
+    Value *NewInst = NULL;
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we won't actually be changing the type of the
+    // variable, just its value.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Trunc->getType())) {
+      NewInst = new TruncInst(
+          State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Trunc->getType())),
+          State.getConverted(Op)->getName() + ".trunc",
+          Trunc);
+    }
+    // Restoring the upper-bits-are-zero invariant effectively truncates the
+    // value.
+    if (shouldConvert(Trunc)) {
+      NewInst = getClearUpper(NewInst ? NewInst : Op,
+                              Trunc->getType(),
+                              Trunc);
+    }
+    assert(NewInst);
+    State.recordConverted(Trunc, NewInst);
+  } else if (AllocaInst *Alloc = dyn_cast<AllocaInst>(Inst)) {
+    // Don't handle arrays of illegal types, but we could handle an array
+    // with size specified as an illegal type, as unlikely as that seems.
+    if (shouldConvert(Alloc) && Alloc->isArrayAllocation())
+      report_fatal_error("Can't convert arrays of illegal type");
+    AllocaInst *NewInst = new AllocaInst(
+        getPromotedType(Alloc->getAllocatedType()),
+        State.getConverted(Alloc->getArraySize()),
+        "", Alloc);
+    NewInst->setAlignment(Alloc->getAlignment());
+    State.recordConverted(Alloc, NewInst);
+  } else if (BitCastInst *BCInst = dyn_cast<BitCastInst>(Inst)) {
+    // Only handle pointers. Ints can't be casted to/from other ints
+    Type *DestType = shouldConvert(BCInst) ?
+        getPromotedType(BCInst->getDestTy()) : BCInst->getDestTy();
+    BitCastInst *NewInst = new BitCastInst(
+        State.getConverted(BCInst->getOperand(0)),
+        DestType,
+        "", BCInst);
+    State.recordConverted(BCInst, NewInst);
+  } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+    if (shouldConvert(Load)) {
+      splitLoad(Load, State);
+    }
+  } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+    if (shouldConvert(Store->getValueOperand())) {
+      splitStore(Store, State);
+    }
+  } else if (isa<CallInst>(Inst)) {
+    report_fatal_error("can't convert calls with illegal types");
+  } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) {
+    Value *NewInst = NULL;
+    if (Binop->getOpcode() == Instruction::AShr) {
+      // The AShr operand needs to be sign-extended to the promoted size
+      // before shifting. Because the sign-extension is implemented with
+      // with AShr, it can be combined with the original operation.
+      Value *Op = Binop->getOperand(0);
+      Value *ShiftAmount = NULL;
+      APInt SignShiftAmt = APInt(
+          getPromotedType(Op->getType())->getIntegerBitWidth(),
+          getPromotedType(Op->getType())->getIntegerBitWidth() -
+          Op->getType()->getIntegerBitWidth());
+      NewInst = BinaryOperator::Create(
+          Instruction::Shl,
+          State.getConverted(Op),
+          ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt),
+          State.getConverted(Op)->getName() + ".getsign",
+          Binop);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(
+              State.getConverted(Binop->getOperand(1)))) {
+        ShiftAmount = ConstantInt::get(getPromotedType(Op->getType()),
+                                       SignShiftAmt + C->getValue());
+      } else {
+        ShiftAmount = BinaryOperator::Create(
+            Instruction::Add,
+            State.getConverted(Binop->getOperand(1)),
+            ConstantInt::get(
+                getPromotedType(Binop->getOperand(1)->getType()),
+                SignShiftAmt),
+            State.getConverted(Op)->getName() + ".shamt", Binop);
+      }
+      NewInst = BinaryOperator::Create(
+          Instruction::AShr,
+          NewInst,
+          ShiftAmount,
+          Binop->getName() + ".result", Binop);
+    } else {
+      // If the original operation is not AShr, just recreate it as usual.
+      NewInst = BinaryOperator::Create(
+          Binop->getOpcode(),
+          State.getConverted(Binop->getOperand(0)),
+          State.getConverted(Binop->getOperand(1)),
+          Binop->getName() + ".result", Binop);
+      if (isa<OverflowingBinaryOperator>(NewInst)) {
+        cast<BinaryOperator>(NewInst)->setHasNoUnsignedWrap
+            (Binop->hasNoUnsignedWrap());
+        cast<BinaryOperator>(NewInst)->setHasNoSignedWrap(
+            Binop->hasNoSignedWrap());
+      }
+    }
+
+    // Now restore the invariant if necessary.
+    // This switch also sanity-checks the operation.
+    switch (Binop->getOpcode()) {
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+      case Instruction::LShr:
+        // These won't change the upper bits.
+        break;
+        // These can change the upper bits, unless we are sure they never
+        // overflow. So clear them now.
+      case Instruction::Add:
+      case Instruction::Sub:
+        if (!(Binop->hasNoUnsignedWrap() && Binop->hasNoSignedWrap()))
+          NewInst = getClearUpper(NewInst, Binop->getType(), Binop);
+        break;
+      case Instruction::Shl:
+        if (!Binop->hasNoUnsignedWrap())
+          NewInst = getClearUpper(NewInst, Binop->getType(), Binop);
+        break;
+        // We modified the upper bits ourselves when implementing AShr
+      case Instruction::AShr:
+        NewInst = getClearUpper(NewInst, Binop->getType(), Binop);
+        break;
+        // We should not see FP operators here.
+        // We don't handle mul/div.
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::Mul:
+      case Instruction::FMul:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem:
+      case Instruction::BinaryOpsEnd:
+        errs() << *Inst << "\n";
+        llvm_unreachable("Cannot handle binary operator");
+        break;
+    }
+
+    State.recordConverted(Binop, NewInst);
+  } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) {
+    Value *Op0, *Op1;
+    // For signed compares, operands are sign-extended to their
+    // promoted type. For unsigned or equality compares, the comparison
+    // is equivalent with the larger type because they are already
+    // zero-extended.
+    if (Cmp->isSigned()) {
+      Op0 = getSignExtend(State.getConverted(Cmp->getOperand(0)),
+                          Cmp->getOperand(0),
+                          Cmp);
+      Op1 = getSignExtend(State.getConverted(Cmp->getOperand(1)),
+                          Cmp->getOperand(1),
+                          Cmp);
+    } else {
+      Op0 = State.getConverted(Cmp->getOperand(0));
+      Op1 = State.getConverted(Cmp->getOperand(1));
+    }
+    ICmpInst *NewInst = new ICmpInst(
+        Cmp, Cmp->getPredicate(), Op0, Op1, "");
+    State.recordConverted(Cmp, NewInst);
+  } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
+    SelectInst *NewInst = SelectInst::Create(
+        Select->getCondition(),
+        State.getConverted(Select->getTrueValue()),
+        State.getConverted(Select->getFalseValue()),
+        "", Select);
+    State.recordConverted(Select, NewInst);
+  } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+    PHINode *NewPhi = PHINode::Create(
+        getPromotedType(Phi->getType()),
+        Phi->getNumIncomingValues(),
+        "", Phi);
+    for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) {
+      NewPhi->addIncoming(State.getConverted(Phi->getIncomingValue(I)),
+                          Phi->getIncomingBlock(I));
+    }
+    State.recordConverted(Phi, NewPhi);
+  } else if (SwitchInst *Switch = dyn_cast<SwitchInst>(Inst)) {
+    SwitchInst *NewInst = SwitchInst::Create(
+        State.getConverted(Switch->getCondition()),
+        Switch->getDefaultDest(),
+        Switch->getNumCases(),
+        Switch);
+    for (SwitchInst::CaseIt I = Switch->case_begin(),
+             E = Switch->case_end();
+         I != E; ++I) {
+      // Build a new case from the ranges that map to the successor BB. Each
+      // range consists of a high and low value which are typed, so the ranges
+      // must be rebuilt and a new case constructed from them.
+      IntegersSubset CaseRanges = I.getCaseValueEx();
+      IntegersSubsetToBB CaseBuilder;
+      for (unsigned RI = 0, RE = CaseRanges.getNumItems(); RI < RE; ++RI) {
+        CaseBuilder.add(
+            IntItem::fromConstantInt(cast<ConstantInt>(convertConstant(
+                CaseRanges.getItem(RI).getLow().toConstantInt()))),
+            IntItem::fromConstantInt(cast<ConstantInt>(convertConstant(
+                CaseRanges.getItem(RI).getHigh().toConstantInt()))));
+      }
+      IntegersSubset Case = CaseBuilder.getCase();
+      NewInst->addCase(Case, I.getCaseSuccessor());
+    }
+    Switch->eraseFromParent();
+  } else {
+    errs() << *Inst<<"\n";
+    llvm_unreachable("unhandled instruction");
+  }
+}
+
+bool PromoteIntegers::runOnFunction(Function &F) {
+  // Don't support changing the function arguments. This should not be
+  // generated by clang.
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+    Value *Arg = I;
+    if (shouldConvert(Arg)) {
+      errs() << "Function " << F.getName() << ": " << *Arg << "\n";
+      llvm_unreachable("Function has illegal integer/pointer argument");
+    }
+  }
+
+  ConversionState State;
+  bool Modified = false;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    for (BasicBlock::iterator BBI = FI->begin(), BBE = FI->end(); BBI != BBE;) {
+      Instruction *Inst = BBI++;
+      // Only attempt to convert an instruction if its result or any of its
+      // operands are illegal.
+      bool ShouldConvert = shouldConvert(Inst);
+      for (User::op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+           OI != OE; ++OI)
+        ShouldConvert |= shouldConvert(cast<Value>(OI));
+
+      if (ShouldConvert) {
+        convertInstruction(Inst, State);
+        Modified = true;
+      }
+    }
+  }
+  State.eraseReplacedInstructions();
+  return Modified;
+}
+
+FunctionPass *llvm::createPromoteIntegersPass() {
+  return new PromoteIntegers();
+}
diff --git a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
new file mode 100644
index 0000000000..cee574296f
--- /dev/null
+++ b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
@@ -0,0 +1,622 @@
+//===- ReplacePtrsWithInts.cpp - Convert pointer values to integer values--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass strips out aggregate pointer types and replaces them with
+// the integer type iPTR, which is i32 for PNaCl (though this pass
+// will allow iPTR to be i64 if the DataLayout specifies 64-bit
+// pointers).
+//
+// The pass converts IR to the following normal form:
+//
+// All inttoptr and ptrtoint instructions use the same integer size
+// (iPTR), so they do not implicitly truncate or zero-extend.
+//
+// alloca always has the result type i8*.
+//
+// Pointer types only appear in the following instructions:
+//  * loads and stores:  the pointer operand is a NormalizedPtr.
+//  * function calls:  the function operand is a NormalizedPtr.
+//  * intrinsic calls:  any pointer arguments are NormalizedPtrs.
+//  * alloca
+//  * bitcast and inttoptr:  only used as part of a NormalizedPtr.
+//  * ptrtoint:  the operand is an InherentPtr.
+//
+// Where an InherentPtr is defined as a pointer value that is:
+//  * an alloca;
+//  * a GlobalValue (a function or global variable); or
+//  * an intrinsic call.
+//
+// And a NormalizedPtr is defined as a pointer value that is:
+//  * an inttoptr instruction;
+//  * an InherentPtr; or
+//  * a bitcast of an InherentPtr.
+//
+// This pass currently strips out lifetime markers (that is, calls to
+// the llvm.lifetime.start/end intrinsics) and invariant markers
+// (calls to llvm.invariant.start/end).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass must recreate functions in
+  // order to change their argument and return types.
+  struct ReplacePtrsWithInts : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ReplacePtrsWithInts() : ModulePass(ID) {
+      initializeReplacePtrsWithIntsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+
+  // FunctionConverter stores the state for mapping old instructions
+  // (of pointer type) to converted instructions (of integer type)
+  // within a function, and provides methods for doing the conversion.
+  class FunctionConverter {
+    // Int type that pointer types are to be replaced with, typically i32.
+    Type *IntPtrType;
+
+    struct RewrittenVal {
+      RewrittenVal(): Placeholder(NULL), NewIntVal(NULL) {}
+      Value *Placeholder;
+      Value *NewIntVal;
+    };
+    // Maps from old values (of pointer type) to converted values (of
+    // IntPtrType type).
+    DenseMap<Value *, RewrittenVal> RewriteMap;
+
+  public:
+    FunctionConverter(Type *IntPtrType) : IntPtrType(IntPtrType) {}
+
+    // Returns the normalized version of the given type, converting
+    // pointer types to IntPtrType.
+    Type *convertType(Type *Ty);
+    // Returns the normalized version of the given function type by
+    // normalizing the function's argument types.
+    FunctionType *convertFuncType(FunctionType *FTy);
+
+    // Records that 'To' is the normalized version of 'From'.  If 'To'
+    // is not of pointer type, no type conversion is required, so this
+    // can take the short cut of replacing 'To' with 'From'.
+    void recordConverted(Value *From, Value *To);
+    void recordConvertedAndErase(Instruction *From, Value *To);
+
+    // Returns Val with no-op casts (those that convert between
+    // IntPtrType and pointer types) stripped off.
+    Value *stripNoopCasts(Value *Val);
+
+    // Returns the normalized version of the given value.
+    //
+    // If the conversion of Val has been deferred, this returns a
+    // placeholder object, which will later be replaceAllUsesWith'd to
+    // the final value.  Since replaceAllUsesWith does not work on
+    // references by metadata nodes, this can be bypassed using
+    // BypassPlaceholder to get the real converted value, assuming it
+    // is available.
+    Value *convert(Value *Val, bool BypassPlaceholder = false);
+    // Returns the NormalizedPtr form of the given pointer value.
+    // Inserts conversion instructions at InsertPt.
+    Value *convertBackToPtr(Value *Val, Instruction *InsertPt);
+    // Returns the NormalizedPtr form of the given function pointer.
+    // Inserts conversion instructions at InsertPt.
+    Value *convertFunctionPtr(Value *Callee, Instruction *InsertPt);
+    // Converts an instruction without recreating it, by wrapping its
+    // operands and result.
+    void convertInPlace(Instruction *Inst);
+
+    void eraseReplacedInstructions();
+
+    // List of instructions whose deletion has been deferred.
+    SmallVector<Instruction *, 20> ToErase;
+  };
+}
+
+Type *FunctionConverter::convertType(Type *Ty) {
+  if (Ty->isPointerTy())
+    return IntPtrType;
+  return Ty;
+}
+
+FunctionType *FunctionConverter::convertFuncType(FunctionType *FTy) {
+  SmallVector<Type *, 8> ArgTypes;
+  for (FunctionType::param_iterator ArgTy = FTy->param_begin(),
+           E = FTy->param_end(); ArgTy != E; ++ArgTy) {
+    ArgTypes.push_back(convertType(*ArgTy));
+  }
+  return FunctionType::get(convertType(FTy->getReturnType()), ArgTypes,
+                           FTy->isVarArg());
+}
+
+void FunctionConverter::recordConverted(Value *From, Value *To) {
+  if (!From->getType()->isPointerTy()) {
+    From->replaceAllUsesWith(To);
+    return;
+  }
+  RewrittenVal *RV = &RewriteMap[From];
+  assert(!RV->NewIntVal);
+  RV->NewIntVal = To;
+}
+
+void FunctionConverter::recordConvertedAndErase(Instruction *From, Value *To) {
+  recordConverted(From, To);
+  // There may still be references to this value, so defer deleting it.
+  ToErase.push_back(From);
+}
+
+Value *FunctionConverter::stripNoopCasts(Value *Val) {
+  SmallPtrSet<Value *, 4> Visited;
+  for (;;) {
+    if (!Visited.insert(Val)) {
+      // It is possible to get a circular reference in unreachable
+      // basic blocks.  Handle this case for completeness.
+      return UndefValue::get(Val->getType());
+    }
+    if (CastInst *Cast = dyn_cast<CastInst>(Val)) {
+      Value *Src = Cast->getOperand(0);
+      if ((isa<BitCastInst>(Cast) && Cast->getType()->isPointerTy()) ||
+          (isa<PtrToIntInst>(Cast) && Cast->getType() == IntPtrType) ||
+          (isa<IntToPtrInst>(Cast) && Src->getType() == IntPtrType)) {
+        Val = Src;
+        continue;
+      }
+    }
+    return Val;
+  }
+}
+
+Value *FunctionConverter::convert(Value *Val, bool BypassPlaceholder) {
+  Val = stripNoopCasts(Val);
+  if (!Val->getType()->isPointerTy())
+    return Val;
+  if (Constant *C = dyn_cast<Constant>(Val))
+    return ConstantExpr::getPtrToInt(C, IntPtrType);
+  RewrittenVal *RV = &RewriteMap[Val];
+  if (BypassPlaceholder) {
+    assert(RV->NewIntVal);
+    return RV->NewIntVal;
+  }
+  if (!RV->Placeholder)
+    RV->Placeholder = new Argument(convertType(Val->getType()));
+  return RV->Placeholder;
+}
+
+Value *FunctionConverter::convertBackToPtr(Value *Val, Instruction *InsertPt) {
+  Type *NewTy =
+    convertType(Val->getType()->getPointerElementType())->getPointerTo();
+  return new IntToPtrInst(convert(Val), NewTy, "", InsertPt);
+}
+
+Value *FunctionConverter::convertFunctionPtr(Value *Callee,
+                                             Instruction *InsertPt) {
+  FunctionType *FuncType = cast<FunctionType>(
+      Callee->getType()->getPointerElementType());
+  return new IntToPtrInst(convert(Callee),
+                          convertFuncType(FuncType)->getPointerTo(),
+                          "", InsertPt);
+}
+
+static bool ShouldLeaveAlone(Value *V) {
+  if (Function *F = dyn_cast<Function>(V))
+    return F->isIntrinsic();
+  if (isa<InlineAsm>(V))
+    return true;
+  return false;
+}
+
+void FunctionConverter::convertInPlace(Instruction *Inst) {
+  // Convert operands.
+  for (unsigned I = 0; I < Inst->getNumOperands(); ++I) {
+    Value *Arg = Inst->getOperand(I);
+    if (Arg->getType()->isPointerTy() && !ShouldLeaveAlone(Arg)) {
+      Value *Conv = convert(Arg);
+      Inst->setOperand(I, new IntToPtrInst(Conv, Arg->getType(), "", Inst));
+    }
+  }
+  // Convert result.
+  if (Inst->getType()->isPointerTy()) {
+    Instruction *Cast = new PtrToIntInst(
+        Inst, convertType(Inst->getType()), Inst->getName() + ".asint");
+    Cast->insertAfter(Inst);
+    recordConverted(Inst, Cast);
+  }
+}
+
+void FunctionConverter::eraseReplacedInstructions() {
+  bool Error = false;
+  for (DenseMap<Value *, RewrittenVal>::iterator I = RewriteMap.begin(),
+           E = RewriteMap.end(); I != E; ++I) {
+    if (I->second.Placeholder) {
+      if (I->second.NewIntVal) {
+        I->second.Placeholder->replaceAllUsesWith(I->second.NewIntVal);
+      } else {
+        errs() << "Not converted: " << *I->first << "\n";
+        Error = true;
+      }
+    }
+  }
+  if (Error)
+    report_fatal_error("Case not handled in ReplacePtrsWithInts");
+
+  // Delete the placeholders in a separate pass.  This means that if
+  // one placeholder is accidentally rewritten to another, we will get
+  // a useful error message rather than accessing a dangling pointer.
+  for (DenseMap<Value *, RewrittenVal>::iterator I = RewriteMap.begin(),
+           E = RewriteMap.end(); I != E; ++I) {
+    delete I->second.Placeholder;
+  }
+
+  // We must do dropAllReferences() before doing eraseFromParent(),
+  // otherwise we will try to erase instructions that are still
+  // referenced.
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->dropAllReferences();
+  }
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->eraseFromParent();
+  }
+}
+
+static void ConvertMetadataOperand(FunctionConverter *FC,
+                                   IntrinsicInst *Call, int Index) {
+  MDNode *MD = cast<MDNode>(Call->getArgOperand(Index));
+  if (MD->getNumOperands() != 1)
+    return;
+  Value *MDArg = MD->getOperand(0);
+  if (MDArg && (isa<Argument>(MDArg) || isa<Instruction>(MDArg))) {
+    MDArg = FC->convert(MDArg, /* BypassPlaceholder= */ true);
+    if (PtrToIntInst *Cast = dyn_cast<PtrToIntInst>(MDArg)) {
+      // Unwrapping this is necessary for llvm.dbg.declare to work.
+      MDArg = Cast->getPointerOperand();
+    }
+    SmallVector<Value *, 1> Args;
+    Args.push_back(MDArg);
+    Call->setArgOperand(Index, MDNode::get(Call->getContext(), Args));
+  }
+}
+
+// Remove attributes that only apply to pointer arguments.  Returns
+// the updated AttributeSet.
+static AttributeSet RemovePointerAttrs(LLVMContext &Context,
+                                       AttributeSet Attrs) {
+  SmallVector<AttributeSet, 8> AttrList;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    unsigned Index = Attrs.getSlotIndex(Slot);
+    AttrBuilder AB;
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      switch (Attr->getKindAsEnum()) {
+        // ByVal and StructRet should already have been removed by the
+        // ExpandByVal pass.
+        case Attribute::ByVal:
+        case Attribute::StructRet:
+        case Attribute::Nest:
+          Attrs.dump();
+          report_fatal_error("ReplacePtrsWithInts cannot handle "
+                             "byval, sret or nest attrs");
+          break;
+        // Strip NoCapture and NoAlias because they are only allowed
+        // on arguments of pointer type, and we are removing the
+        // pointer types.
+        case Attribute::NoCapture:
+        case Attribute::NoAlias:
+          break;
+        default:
+          AB.addAttribute(*Attr);
+      }
+    }
+    AttrList.push_back(AttributeSet::get(Context, Index, AB));
+  }
+  return AttributeSet::get(Context, AttrList);
+}
+
+static void ConvertInstruction(DataLayout *DL, Type *IntPtrType,
+                               FunctionConverter *FC, Instruction *Inst) {
+  if (ReturnInst *Ret = dyn_cast<ReturnInst>(Inst)) {
+    Value *Result = Ret->getReturnValue();
+    if (Result)
+      Result = FC->convert(Result);
+    CopyDebug(ReturnInst::Create(Ret->getContext(), Result, Ret), Inst);
+    Ret->eraseFromParent();
+  } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+    PHINode *Phi2 = PHINode::Create(FC->convertType(Phi->getType()),
+                                    Phi->getNumIncomingValues(),
+                                    "", Phi);
+    CopyDebug(Phi2, Phi);
+    for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) {
+      Phi2->addIncoming(FC->convert(Phi->getIncomingValue(I)),
+                        Phi->getIncomingBlock(I));
+    }
+    Phi2->takeName(Phi);
+    FC->recordConvertedAndErase(Phi, Phi2);
+  } else if (SelectInst *Op = dyn_cast<SelectInst>(Inst)) {
+    Instruction *Op2 = SelectInst::Create(Op->getCondition(),
+                                          FC->convert(Op->getTrueValue()),
+                                          FC->convert(Op->getFalseValue()),
+                                          "", Op);
+    CopyDebug(Op2, Op);
+    Op2->takeName(Op);
+    FC->recordConvertedAndErase(Op, Op2);
+  } else if (isa<PtrToIntInst>(Inst) || isa<IntToPtrInst>(Inst)) {
+    Value *Arg = FC->convert(Inst->getOperand(0));
+    Type *ResultTy = FC->convertType(Inst->getType());
+    unsigned ArgSize = Arg->getType()->getIntegerBitWidth();
+    unsigned ResultSize = ResultTy->getIntegerBitWidth();
+    Value *Result;
+    // We avoid using IRBuilder's CreateZExtOrTrunc() here because it
+    // constant-folds ptrtoint ConstantExprs.  This leads to creating
+    // ptrtoints of non-IntPtrType type, which is not what we want,
+    // because we want truncation/extension to be done explicitly by
+    // separate instructions.
+    if (ArgSize == ResultSize) {
+      Result = Arg;
+    } else {
+      Instruction::CastOps CastType =
+          ArgSize > ResultSize ? Instruction::Trunc : Instruction::ZExt;
+      Result = CopyDebug(CastInst::Create(CastType, Arg, ResultTy, "", Inst),
+                         Inst);
+    }
+    if (Result != Arg)
+      Result->takeName(Inst);
+    FC->recordConvertedAndErase(Inst, Result);
+  } else if (isa<BitCastInst>(Inst)) {
+    if (Inst->getType()->isPointerTy()) {
+      FC->ToErase.push_back(Inst);
+    }
+  } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) {
+    Value *Cmp2 = CopyDebug(new ICmpInst(Inst, Cmp->getPredicate(),
+                                         FC->convert(Cmp->getOperand(0)),
+                                         FC->convert(Cmp->getOperand(1)), ""),
+                            Inst);
+    Cmp2->takeName(Cmp);
+    Cmp->replaceAllUsesWith(Cmp2);
+    Cmp->eraseFromParent();
+  } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+    Value *Ptr = FC->convertBackToPtr(Load->getPointerOperand(), Inst);
+    LoadInst *Result = new LoadInst(Ptr, "", Inst);
+    Result->takeName(Inst);
+    CopyDebug(Result, Inst);
+    CopyLoadOrStoreAttrs(Result, Load);
+    FC->recordConvertedAndErase(Inst, Result);
+  } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+    Value *Ptr = FC->convertBackToPtr(Store->getPointerOperand(), Inst);
+    StoreInst *Result = new StoreInst(FC->convert(Store->getValueOperand()),
+                                      Ptr, Inst);
+    CopyDebug(Result, Inst);
+    CopyLoadOrStoreAttrs(Result, Store);
+    Inst->eraseFromParent();
+  } else if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+    if (IntrinsicInst *ICall = dyn_cast<IntrinsicInst>(Inst)) {
+      if (ICall->getIntrinsicID() == Intrinsic::lifetime_start ||
+          ICall->getIntrinsicID() == Intrinsic::lifetime_end ||
+          ICall->getIntrinsicID() == Intrinsic::invariant_start ||
+          ICall->getIntrinsicID() == Intrinsic::invariant_end) {
+        // Remove alloca lifetime markers for now.  This is because
+        // the GVN pass can introduce lifetime markers taking PHI
+        // nodes as arguments.  If ReplacePtrsWithInts converts the
+        // PHI node to int type, we will render those lifetime markers
+        // ineffective.  But dropping a subset of lifetime markers is
+        // not safe in general.  So, until LLVM better defines the
+        // semantics of lifetime markers, we drop them all.  See:
+        // https://code.google.com/p/nativeclient/issues/detail?id=3443
+        // We do the same for invariant.start/end because they work in
+        // a similar way.
+        Inst->eraseFromParent();
+      } else {
+        FC->convertInPlace(Inst);
+      }
+    } else if (isa<InlineAsm>(Call->getCalledValue())) {
+      FC->convertInPlace(Inst);
+    } else {
+      SmallVector<Value *, 10> Args;
+      for (unsigned I = 0; I < Call->getNumArgOperands(); ++I)
+        Args.push_back(FC->convert(Call->getArgOperand(I)));
+      CallInst *NewCall = CallInst::Create(
+          FC->convertFunctionPtr(Call->getCalledValue(), Call),
+          Args, "", Inst);
+      CopyDebug(NewCall, Call);
+      NewCall->setAttributes(RemovePointerAttrs(Call->getContext(),
+                                                Call->getAttributes()));
+      NewCall->setCallingConv(Call->getCallingConv());
+      NewCall->takeName(Call);
+      FC->recordConvertedAndErase(Call, NewCall);
+    }
+  } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+    SmallVector<Value *, 10> Args;
+    for (unsigned I = 0; I < Call->getNumArgOperands(); ++I)
+      Args.push_back(FC->convert(Call->getArgOperand(I)));
+    InvokeInst *NewCall = InvokeInst::Create(
+        FC->convertFunctionPtr(Call->getCalledValue(), Call),
+        Call->getNormalDest(),
+        Call->getUnwindDest(),
+        Args, "", Inst);
+    CopyDebug(NewCall, Call);
+    NewCall->setAttributes(RemovePointerAttrs(Call->getContext(),
+                                              Call->getAttributes()));
+    NewCall->setCallingConv(Call->getCallingConv());
+    NewCall->takeName(Call);
+    FC->recordConvertedAndErase(Call, NewCall);
+  } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Inst)) {
+    Type *ElementTy = Inst->getType()->getPointerElementType();
+    Constant *ElementSize = ConstantInt::get(IntPtrType,
+                                             DL->getTypeAllocSize(ElementTy));
+    // Expand out alloca's built-in multiplication.
+    Value *MulSize;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Alloca->getArraySize())) {
+      MulSize = ConstantExpr::getMul(ElementSize, C);
+    } else {
+      MulSize = BinaryOperator::Create(
+          Instruction::Mul, ElementSize, Alloca->getArraySize(),
+          Alloca->getName() + ".alloca_mul", Alloca);
+    }
+    unsigned Alignment = Alloca->getAlignment();
+    if (Alignment == 0)
+      Alignment = DL->getPrefTypeAlignment(ElementTy);
+    Value *Tmp = CopyDebug(new AllocaInst(Type::getInt8Ty(Inst->getContext()),
+                                          MulSize, Alignment, "", Inst),
+                           Inst);
+    Tmp->takeName(Alloca);
+    Value *Alloca2 = new PtrToIntInst(Tmp, IntPtrType,
+                                      Tmp->getName() + ".asint", Inst);
+    FC->recordConvertedAndErase(Alloca, Alloca2);
+  } else if (// These atomics only operate on integer pointers, not
+             // other pointers, so we don't need to recreate the
+             // instruction.
+             isa<AtomicCmpXchgInst>(Inst) ||
+             isa<AtomicRMWInst>(Inst) ||
+             // Handle these instructions as a convenience to allow
+             // the pass to be used in more situations, even though we
+             // don't expect them in PNaCl's stable ABI.
+             isa<GetElementPtrInst>(Inst) ||
+             isa<VAArgInst>(Inst) ||
+             isa<IndirectBrInst>(Inst) ||
+             isa<ExtractValueInst>(Inst) ||
+             isa<InsertValueInst>(Inst)) {
+    FC->convertInPlace(Inst);
+  }
+}
+
+// Convert ptrtoint+inttoptr to a bitcast because it's shorter and
+// because some intrinsics work on bitcasts but not on
+// ptrtoint+inttoptr, in particular:
+//  * llvm.lifetime.start/end (although we strip these out)
+//  * llvm.eh.typeid.for
+static void SimplifyCasts(Instruction *Inst, Type *IntPtrType) {
+  if (IntToPtrInst *Cast1 = dyn_cast<IntToPtrInst>(Inst)) {
+    if (PtrToIntInst *Cast2 = dyn_cast<PtrToIntInst>(Cast1->getOperand(0))) {
+      assert(Cast2->getType() == IntPtrType);
+      Value *V = Cast2->getPointerOperand();
+      if (V->getType() != Cast1->getType())
+        V = new BitCastInst(V, Cast1->getType(), V->getName() + ".bc", Cast1);
+      Cast1->replaceAllUsesWith(V);
+      if (Cast1->use_empty())
+        Cast1->eraseFromParent();
+      if (Cast2->use_empty())
+        Cast2->eraseFromParent();
+    }
+  }
+}
+
+static void CleanUpFunction(Function *Func, Type *IntPtrType) {
+  // Remove the ptrtoint/bitcast ConstantExprs we introduced for
+  // referencing globals.
+  FunctionPass *Pass = createExpandConstantExprPass();
+  Pass->runOnFunction(*Func);
+  delete Pass;
+
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      SimplifyCasts(Iter++, IntPtrType);
+    }
+  }
+  // Cleanup pass.
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      // Add names to inttoptrs to make the output more readable.  The
+      // placeholder values get in the way of doing this earlier when
+      // the inttoptrs are created.
+      if (isa<IntToPtrInst>(Inst))
+        Inst->setName(Inst->getOperand(0)->getName() + ".asptr");
+      // Remove ptrtoints that were introduced for allocas but not used.
+      if (isa<PtrToIntInst>(Inst) && Inst->use_empty())
+        Inst->eraseFromParent();
+    }
+  }
+}
+
+char ReplacePtrsWithInts::ID = 0;
+INITIALIZE_PASS(ReplacePtrsWithInts, "replace-ptrs-with-ints",
+                "Convert pointer values to integer values",
+                false, false)
+
+bool ReplacePtrsWithInts::runOnModule(Module &M) {
+  DataLayout DL(&M);
+  Type *IntPtrType = DL.getIntPtrType(M.getContext());
+
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *OldFunc = Iter++;
+    // Intrinsics' types must be left alone.
+    if (OldFunc->isIntrinsic())
+      continue;
+
+    FunctionConverter FC(IntPtrType);
+    FunctionType *NFTy = FC.convertFuncType(OldFunc->getFunctionType());
+    OldFunc->setAttributes(RemovePointerAttrs(M.getContext(),
+                                              OldFunc->getAttributes()));
+    Function *NewFunc = RecreateFunction(OldFunc, NFTy);
+
+    // Move the arguments across to the new function.
+    for (Function::arg_iterator Arg = OldFunc->arg_begin(),
+             E = OldFunc->arg_end(), NewArg = NewFunc->arg_begin();
+         Arg != E; ++Arg, ++NewArg) {
+      FC.recordConverted(Arg, NewArg);
+      NewArg->takeName(Arg);
+    }
+
+    // Convert the function body.
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        ConvertInstruction(&DL, IntPtrType, &FC, Iter++);
+      }
+    }
+    // Now that all the replacement instructions have been created, we
+    // can update the debug intrinsic calls.
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+           Inst != E; ++Inst) {
+        if (IntrinsicInst *Call = dyn_cast<IntrinsicInst>(Inst)) {
+          if (Call->getIntrinsicID() == Intrinsic::dbg_declare) {
+            ConvertMetadataOperand(&FC, Call, 0);
+          }
+        }
+      }
+    }
+    FC.eraseReplacedInstructions();
+    OldFunc->eraseFromParent();
+  }
+  // Now that all functions have their normalized types, we can remove
+  // various casts.
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    CleanUpFunction(Func, IntPtrType);
+    // Delete the now-unused bitcast ConstantExprs that we created so
+    // that they don't interfere with StripDeadPrototypes.
+    Func->removeDeadConstantUsers();
+  }
+  return true;
+}
+
+ModulePass *llvm::createReplacePtrsWithIntsPass() {
+  return new ReplacePtrsWithInts();
+}
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
new file mode 100644
index 0000000000..e4efeb67c3
--- /dev/null
+++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
@@ -0,0 +1,100 @@
+//===- ResolvePNaClIntrinsics.cpp - Resolve calls to PNaCl intrinsics ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass resolves calls to PNaCl stable bitcode intrinsics. It is
+// normally run in the PNaCl translator.
+//
+// Running AddPNaClExternalDeclsPass is a precondition for running this pass.
+// They are separate because one is a ModulePass and the other is a
+// FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class ResolvePNaClIntrinsics : public FunctionPass {
+  public:
+    ResolvePNaClIntrinsics() : FunctionPass(ID) {
+      initializeResolvePNaClIntrinsicsPass(*PassRegistry::getPassRegistry());
+    }
+
+    static char ID;
+    virtual bool runOnFunction(Function &F);
+  private:
+    // Some intrinsic calls are resolved simply by replacing the call with a
+    // call to an alternative function with exactly the same type.
+    bool resolveSimpleCall(Function &F, Intrinsic::ID IntrinsicID,
+                           const char *TargetFunctionName);
+  };
+}
+
+bool ResolvePNaClIntrinsics::resolveSimpleCall(Function &F,
+                                               Intrinsic::ID IntrinsicID,
+                                               const char *TargetFunctionName) {
+  Module *M = F.getParent();
+  bool Changed = false;
+  Function *IntrinsicFunction = Intrinsic::getDeclaration(M, IntrinsicID);
+
+  if (!IntrinsicFunction) {
+    return false;
+  }
+
+  // Expect to find the target function for this intrinsic already declared
+  Function *TargetFunction = M->getFunction(TargetFunctionName);
+  if (!TargetFunction) {
+    report_fatal_error(
+        std::string("Expected to find external declaration of ") +
+        TargetFunctionName);
+  }
+
+  for (Value::use_iterator UI = IntrinsicFunction->use_begin(),
+                           UE = IntrinsicFunction->use_end(); UI != UE;) {
+    // At this point, the only uses of the intrinsic can be calls, since
+    // we assume this pass runs on bitcode that passed ABI verification.
+    CallInst *Call = dyn_cast<CallInst>(*UI++);
+
+    if (!Call) {
+      report_fatal_error(
+          std::string("Expected use of intrinsic to be a call: ") +
+          Intrinsic::getName(IntrinsicID));
+    }
+
+    // To be a well-behaving FunctionPass, don't touch uses in other
+    // functions. These will be handled when the pass manager gets to those
+    // functions.
+    if (Call->getParent()->getParent() == &F) {
+      Call->setCalledFunction(TargetFunction);
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool ResolvePNaClIntrinsics::runOnFunction(Function &F) {
+  bool Changed = resolveSimpleCall(F, Intrinsic::nacl_setjmp, "setjmp");
+  Changed |= resolveSimpleCall(F, Intrinsic::nacl_longjmp, "longjmp");
+  return Changed;
+}
+
+char ResolvePNaClIntrinsics::ID = 0;
+INITIALIZE_PASS(ResolvePNaClIntrinsics, "resolve-pnacl-intrinsics",
+                "Resolve PNaCl intrinsic calls", false, false)
+
+FunctionPass *llvm::createResolvePNaClIntrinsicsPass() {
+  return new ResolvePNaClIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
new file mode 100644
index 0000000000..17cd2347cc
--- /dev/null
+++ b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
@@ -0,0 +1,71 @@
+//===- RewriteLLVMIntrinsics.cpp - Rewrite LLVM intrinsics to other values ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces calls to LLVM intrinsics that are *not* part of the
+// PNaCl stable bitcode ABI into simpler values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class RewriteLLVMIntrinsics : public ModulePass {
+  public:
+    static char ID;
+    RewriteLLVMIntrinsics() : ModulePass(ID) {
+      // This is a module pass because this makes it easier to access uses
+      // of global intrinsic functions.
+      initializeRewriteLLVMIntrinsicsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char RewriteLLVMIntrinsics::ID = 0;
+INITIALIZE_PASS(RewriteLLVMIntrinsics, "rewrite-llvm-intrinsic-calls",
+                "Rewrite LLVM intrinsic calls to simpler expressions",
+                false, false)
+
+bool RewriteLLVMIntrinsics::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Iterate over all uses of the llvm.flt.rounds, and replace it with
+  // the constant "1" (round-to-nearest).  Until we add a second intrinsic
+  // like llvm.set.flt.round it is impossible to have a rounding mode
+  // that is not the initial rounding mode (round-to-nearest).
+  // We can remove this rewrite after adding a set() intrinsic.
+  Function *FltRounds = Intrinsic::getDeclaration(&M, Intrinsic::flt_rounds);
+  Type *RetType = FltRounds->getFunctionType()->getReturnType();
+  for (Value::use_iterator UI = FltRounds->use_begin(),
+           UE = FltRounds->use_end(); UI != UE;) {
+    Value *Use = *UI++;
+    if (CallInst *Call = dyn_cast<CallInst>(Use)) {
+      Constant *C = ConstantInt::get(RetType, 1);
+      Call->replaceAllUsesWith(C);
+      Call->eraseFromParent();
+      Changed = true;
+    } else {
+      report_fatal_error("Taking the address of llvm.flt.rounds is invalid");
+    }
+  }
+  FltRounds->eraseFromParent();
+
+  return Changed;
+}
+
+ModulePass *llvm::createRewriteLLVMIntrinsicsPass() {
+  return new RewriteLLVMIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp b/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp
new file mode 100644
index 0000000000..a01e8c6b2a
--- /dev/null
+++ b/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp
@@ -0,0 +1,537 @@
+//===- RewritePNaClLibraryCalls.cpp - PNaCl library calls to intrinsics ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces calls to known library functions with calls to intrinsics
+// that are part of the PNaCl stable bitcode ABI.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include <cstdarg>
+
+using namespace llvm;
+
+namespace {
+  class RewritePNaClLibraryCalls : public ModulePass {
+  public:
+    static char ID;
+    RewritePNaClLibraryCalls() :
+        ModulePass(ID), TheModule(NULL), Context(NULL), SetjmpIntrinsic(NULL),
+        LongjmpIntrinsic(NULL), MemcpyIntrinsic(NULL),
+        MemmoveIntrinsic(NULL), MemsetIntrinsic(NULL) {
+      // This is a module pass because it may have to introduce
+      // intrinsic declarations into the module and modify globals.
+      initializeRewritePNaClLibraryCallsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  private:
+    typedef void (RewritePNaClLibraryCalls::*RewriteCallFunc)(CallInst *);
+    typedef void (RewritePNaClLibraryCalls::*PopulateWrapperFunc)(Function *);
+
+    /// Handles a certain pattern of library function -> intrinsic rewrites.
+    /// Currently all library functions this pass knows how to rewrite fall into
+    /// this pattern.
+    /// RewriteLibraryCall performs the rewrite for a single library function
+    /// and is customized by its arguments.
+    ///
+    /// \p LibraryFunctionName Name of the library function to look for.
+    /// \p CorrectFunctionType is the correct type of this library function.
+    /// \p CallRewriter Method that rewrites the library function call into an
+    ///    intrinsic call.
+    /// \p OnlyCallsAllowed Only calls to this library function are allowed.
+    /// \p WrapperPopulator called to populate the body of the library function
+    ///    with a wrapped intrinsic call.
+    bool RewriteLibraryCall(
+        const char *LibraryFunctionName,
+        FunctionType *CorrectFunctionType,
+        RewriteCallFunc CallRewriter,
+        bool OnlyCallsAllowed,
+        PopulateWrapperFunc WrapperPopulator);
+
+    /// Two function types are compatible if they have compatible return types
+    /// and the same number of compatible parameters. Return types and
+    /// parameters are compatible if they are exactly the same type or both are
+    /// pointer types.
+    static bool compatibleFunctionTypes(FunctionType *FTy1, FunctionType *FTy2);
+    static bool compatibleParamOrRetTypes(Type *Ty1, Type *Ty2);
+
+    void rewriteSetjmpCall(CallInst *Call);
+    void rewriteLongjmpCall(CallInst *Call);
+    void rewriteMemcpyCall(CallInst *Call);
+    void rewriteMemmoveCall(CallInst *Call);
+    void rewriteMemsetCall(CallInst *Call);
+
+    void populateSetjmpWrapper(Function *SetjmpFunc);
+    void populateLongjmpWrapper(Function *LongjmpFunc);
+    void populateMemcpyWrapper(Function *MemcpyFunc);
+    void populateMemmoveWrapper(Function *MemmoveFunc);
+    void populateMemsetWrapper(Function *MemsetFunc);
+
+    /// Generic implementation of populating a wrapper function.
+    /// Initially, the function exists in the module as a declaration with
+    /// unnamed arguments. This method is called with a NULL-terminated list
+    /// of argument names that get assigned in the generated IR for
+    /// readability.
+    void populateWrapperCommon(
+        Function *Func,
+        StringRef FuncName,
+        RewriteCallFunc CallRewriter,
+        bool CallCannotReturn,
+        ...);
+
+    /// Find and cache known intrinsics.
+    Function *findSetjmpIntrinsic();
+    Function *findLongjmpIntrinsic();
+    Function *findMemcpyIntrinsic();
+    Function *findMemmoveIntrinsic();
+    Function *findMemsetIntrinsic();
+
+    /// Cached data that remains the same throughout a module run.
+    Module *TheModule;
+    LLVMContext *Context;
+
+    /// These are cached but computed lazily.
+    Function *SetjmpIntrinsic;
+    Function *LongjmpIntrinsic;
+    Function *MemcpyIntrinsic;
+    Function *MemmoveIntrinsic;
+    Function *MemsetIntrinsic;
+  };
+}
+
+char RewritePNaClLibraryCalls::ID = 0;
+INITIALIZE_PASS(RewritePNaClLibraryCalls, "rewrite-pnacl-library-calls",
+                "Rewrite PNaCl library calls to stable intrinsics",
+                false, false)
+
+bool RewritePNaClLibraryCalls::RewriteLibraryCall(
+    const char *LibraryFunctionName,
+    FunctionType *CorrectFunctionType,
+    RewriteCallFunc CallRewriter,
+    bool OnlyCallsAllowed,
+    PopulateWrapperFunc WrapperPopulator) {
+  bool Changed = false;
+
+  Function *LibFunc = TheModule->getFunction(LibraryFunctionName);
+
+  // Iterate over all uses of this function, if it exists in the module with
+  // external linkage. If it exists but the linkage is not external, this may
+  // come from code that defines its own private function with the same name
+  // and doesn't actually include the standard libc header declaring it.
+  // In such a case we leave the code as it is.
+  //
+  // Another case we need to handle here is this function having the wrong
+  // prototype (incompatible with the C library function prototype, and hence
+  // incompatible with the intrinsic). In general, this is undefined behavior,
+  // but we can't fail compilation because some workflows rely on it
+  // compiling correctly (for example, autoconf). The solution is:
+  // When the declared type of the function in the module is not correct, we
+  // re-create the function with the correct prototype and replace all calls
+  // to this new function (casted to the old function type). Effectively this
+  // delays the undefined behavior until run-time.
+  if (LibFunc && LibFunc->hasExternalLinkage()) {
+    if (!compatibleFunctionTypes(LibFunc->getFunctionType(),
+                                 CorrectFunctionType)) {
+      // Use the RecreateFunction utility to create a new function with the
+      // correct prototype. RecreateFunction also RAUWs the function with
+      // proper bitcasts.
+      //
+      // One interesting case that may arise is when the original module had
+      // calls to both a correct and an incorrect version of the library
+      // function. Depending on the linking order, either version could be
+      // selected as the global declaration in the module, so even valid calls
+      // could end up being bitcast-ed from the incorrect to the correct
+      // function type. The RecreateFunction call below will eliminate such
+      // bitcasts (because the new type matches the call type), but dead
+      // constant expressions may be left behind.
+      // These are cleaned up with removeDeadConstantUsers.
+      Function *NewFunc = RecreateFunction(LibFunc, CorrectFunctionType);
+      LibFunc->eraseFromParent();
+      NewFunc->setLinkage(Function::InternalLinkage);
+      Changed = true;
+      NewFunc->removeDeadConstantUsers();
+      LibFunc = NewFunc;
+    }
+
+    // Handle all uses that are calls. These are simply replaced with
+    // equivalent intrinsic calls.
+    for (Value::use_iterator UI = LibFunc->use_begin(),
+                             UE = LibFunc->use_end(); UI != UE;) {
+      Value *Use = *UI++;
+      if (CallInst *Call = dyn_cast<CallInst>(Use)) {
+        (this->*(CallRewriter))(Call);
+        Changed = true;
+      }
+    }
+
+    if (LibFunc->use_empty()) {
+      LibFunc->eraseFromParent();
+    } else if (OnlyCallsAllowed) {
+      // If additional uses remain, these aren't calls.
+      report_fatal_error(Twine("Taking the address of ") +
+                         LibraryFunctionName + " is invalid");
+    } else {
+      // If non-call uses remain and allowed for this function, populate it
+      // with a wrapper.
+      (this->*(WrapperPopulator))(LibFunc);
+      LibFunc->setLinkage(Function::InternalLinkage);
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool RewritePNaClLibraryCalls::runOnModule(Module &M) {
+  TheModule = &M;
+  Context = &TheModule->getContext();
+  bool Changed = false;
+
+  Type *Int8PtrTy = Type::getInt8PtrTy(*Context);
+  Type *Int64PtrTy = Type::getInt64PtrTy(*Context);
+  Type *Int32Ty = Type::getInt32Ty(*Context);
+  Type *VoidTy = Type::getVoidTy(*Context);
+
+  Type *SetjmpParams[] = { Int64PtrTy };
+  FunctionType *SetjmpFunctionType = FunctionType::get(Int32Ty, SetjmpParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "setjmp",
+      SetjmpFunctionType,
+      &RewritePNaClLibraryCalls::rewriteSetjmpCall,
+      true,
+      &RewritePNaClLibraryCalls::populateSetjmpWrapper);
+
+  Type *LongjmpParams[] = { Int64PtrTy, Int32Ty };
+  FunctionType *LongjmpFunctionType = FunctionType::get(VoidTy, LongjmpParams,
+                                                        false);
+  Changed |= RewriteLibraryCall(
+      "longjmp",
+      LongjmpFunctionType,
+      &RewritePNaClLibraryCalls::rewriteLongjmpCall,
+      false,
+      &RewritePNaClLibraryCalls::populateLongjmpWrapper);
+
+  Type *MemsetParams[] = { Int8PtrTy, Int32Ty, Int32Ty };
+  FunctionType *MemsetFunctionType = FunctionType::get(Int8PtrTy, MemsetParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "memset",
+      MemsetFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemsetCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemsetWrapper);
+
+  Type *MemcpyParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty };
+  FunctionType *MemcpyFunctionType = FunctionType::get(Int8PtrTy, MemcpyParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "memcpy",
+      MemcpyFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemcpyCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemcpyWrapper);
+
+  Type *MemmoveParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty };
+  FunctionType *MemmoveFunctionType = FunctionType::get(Int8PtrTy,
+                                                        MemmoveParams,
+                                                        false);
+  Changed |= RewriteLibraryCall(
+      "memmove",
+      MemmoveFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemmoveCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemmoveWrapper);
+
+  return Changed;
+}
+
+bool RewritePNaClLibraryCalls::compatibleFunctionTypes(FunctionType *FTy1,
+                                                       FunctionType *FTy2) {
+  if (FTy1->getNumParams() != FTy2->getNumParams()) {
+    return false;
+  }
+
+  if (!compatibleParamOrRetTypes(FTy1->getReturnType(),
+                                 FTy2->getReturnType())) {
+    return false;
+  }
+
+  for (unsigned I = 0, End = FTy1->getNumParams(); I != End; ++I) {
+    if (!compatibleParamOrRetTypes(FTy1->getParamType(I), 
+                                   FTy2->getParamType(I))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool RewritePNaClLibraryCalls::compatibleParamOrRetTypes(Type *Ty1,
+                                                         Type *Ty2) {
+  return (Ty1 == Ty2 || (Ty1->isPointerTy() && Ty2->isPointerTy()));
+}
+
+void RewritePNaClLibraryCalls::rewriteSetjmpCall(CallInst *Call) {
+  // Find the intrinsic function.
+  Function *NaClSetjmpFunc = findSetjmpIntrinsic();
+  // Cast the jmp_buf argument to the type NaClSetjmpCall expects.
+  Type *PtrTy = NaClSetjmpFunc->getFunctionType()->getParamType(0);
+  BitCastInst *JmpBufCast = new BitCastInst(Call->getArgOperand(0), PtrTy,
+                                            "jmp_buf_i8", Call);
+  const DebugLoc &DLoc = Call->getDebugLoc();
+  JmpBufCast->setDebugLoc(DLoc);
+
+  // Emit the updated call.
+  Value *Args[] = { JmpBufCast };
+  CallInst *NaClSetjmpCall = CallInst::Create(NaClSetjmpFunc, Args, "", Call);
+  NaClSetjmpCall->setDebugLoc(DLoc);
+  NaClSetjmpCall->takeName(Call);
+
+  // Replace the original call.
+  Call->replaceAllUsesWith(NaClSetjmpCall);
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteLongjmpCall(CallInst *Call) {
+  // Find the intrinsic function.
+  Function *NaClLongjmpFunc = findLongjmpIntrinsic();
+  // Cast the jmp_buf argument to the type NaClLongjmpCall expects.
+  Type *PtrTy = NaClLongjmpFunc->getFunctionType()->getParamType(0);
+  BitCastInst *JmpBufCast = new BitCastInst(Call->getArgOperand(0), PtrTy,
+                                            "jmp_buf_i8", Call);
+  const DebugLoc &DLoc = Call->getDebugLoc();
+  JmpBufCast->setDebugLoc(DLoc);
+
+  // Emit the call.
+  Value *Args[] = { JmpBufCast, Call->getArgOperand(1) };
+  CallInst *NaClLongjmpCall = CallInst::Create(NaClLongjmpFunc, Args, "", Call);
+  NaClLongjmpCall->setDebugLoc(DLoc);
+  // No takeName here since longjmp is a void call that does not get assigned to
+  // a value.
+
+  // Remove the original call. There's no need for RAUW because longjmp
+  // returns void.
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemcpyCall(CallInst *Call) {
+  Function *MemcpyIntrinsic = findMemcpyIntrinsic();
+  // dest, src, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    Call->getArgOperand(1),
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CallInst *MemcpyIntrinsicCall = CallInst::Create(MemcpyIntrinsic,
+                                                   Args, "", Call);
+  MemcpyIntrinsicCall->setDebugLoc(Call->getDebugLoc());
+
+  // libc memcpy returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemmoveCall(CallInst *Call) {
+  Function *MemmoveIntrinsic = findMemmoveIntrinsic();
+  // dest, src, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    Call->getArgOperand(1),
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CallInst *MemmoveIntrinsicCall = CallInst::Create(MemmoveIntrinsic,
+                                                    Args, "", Call);
+  MemmoveIntrinsicCall->setDebugLoc(Call->getDebugLoc());
+
+  // libc memmove returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemsetCall(CallInst *Call) {
+  Function *MemsetIntrinsic = findMemsetIntrinsic();
+  // libc memset has 'int c' for the filler byte, but the LLVM intrinsic uses
+  // a i8; truncation is required.
+  TruncInst *ByteTrunc = new TruncInst(Call->getArgOperand(1),
+                                       Type::getInt8Ty(*Context),
+                                       "trunc_byte", Call);
+
+  const DebugLoc &DLoc = Call->getDebugLoc();
+  ByteTrunc->setDebugLoc(DLoc);
+
+  // dest, val, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    ByteTrunc,
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CallInst *MemsetIntrinsicCall = CallInst::Create(MemsetIntrinsic,
+                                                   Args, "", Call);
+  MemsetIntrinsicCall->setDebugLoc(DLoc);
+
+  // libc memset returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::populateWrapperCommon(
+      Function *Func,
+      StringRef FuncName,
+      RewriteCallFunc CallRewriter,
+      bool CallCannotReturn,
+      ...) {
+  if (!Func->isDeclaration()) {
+    report_fatal_error(Twine("Expected ") + FuncName +
+                       " to be declared, not defined");
+  }
+
+  // Populate the function body with code.
+  BasicBlock *BB = BasicBlock::Create(*Context, "entry", Func);
+
+  // Collect and name the function arguments.
+  Function::arg_iterator FuncArgs = Func->arg_begin();
+  SmallVector<Value *, 4> Args;
+  va_list ap;
+  va_start(ap, CallCannotReturn);
+  while (true) {
+    // Iterate over the varargs until a terminated NULL is encountered.
+    const char *ArgName = va_arg(ap, const char *);
+    if (!ArgName)
+      break;
+    Value *Arg = FuncArgs++;
+    Arg->setName(ArgName);
+    Args.push_back(Arg);
+  }
+  va_end(ap);
+
+  // Emit a call to self, and then call CallRewriter to rewrite it to the
+  // intrinsic. This is done in order to keep the call rewriting logic in a
+  // single place.
+  CallInst *SelfCall = CallInst::Create(Func, Args, "", BB);
+
+  if (CallCannotReturn) {
+    new UnreachableInst(*Context, BB);
+  } else if (Func->getReturnType()->isVoidTy()) {
+    ReturnInst::Create(*Context, BB);
+  } else {
+    ReturnInst::Create(*Context, SelfCall, BB);
+  }
+
+  (this->*(CallRewriter))(SelfCall);
+}
+
+void RewritePNaClLibraryCalls::populateSetjmpWrapper(Function *SetjmpFunc) {
+  populateWrapperCommon(
+      /* Func             */ SetjmpFunc,
+      /* FuncName         */ "setjmp",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteSetjmpCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "env", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateLongjmpWrapper(Function *LongjmpFunc) {
+  populateWrapperCommon(
+      /* Func             */ LongjmpFunc,
+      /* FuncName         */ "longjmp",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteLongjmpCall,
+      /* CallCannotReturn */ true,
+      /* ...              */ "env", "val", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemcpyWrapper(Function *MemcpyFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemcpyFunc,
+      /* FuncName         */ "memcpy",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemcpyCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "src", "len", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemmoveWrapper(Function *MemmoveFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemmoveFunc,
+      /* FuncName         */ "memmove",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemmoveCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "src", "len", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemsetWrapper(Function *MemsetFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemsetFunc,
+      /* FuncName         */ "memset",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemsetCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "val", "len", NULL);
+}
+
+Function *RewritePNaClLibraryCalls::findSetjmpIntrinsic() {
+  if (!SetjmpIntrinsic) {
+    SetjmpIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::nacl_setjmp);
+  }
+  return SetjmpIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findLongjmpIntrinsic() {
+  if (!LongjmpIntrinsic) {
+    LongjmpIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::nacl_longjmp);
+  }
+  return LongjmpIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemcpyIntrinsic() {
+  if (!MemcpyIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context),
+                    Type::getInt8PtrTy(*Context),
+                    Type::getInt32Ty(*Context) };
+    MemcpyIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memcpy, Tys);
+  }
+  return MemcpyIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemmoveIntrinsic() {
+  if (!MemmoveIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context),
+                    Type::getInt8PtrTy(*Context),
+                    Type::getInt32Ty(*Context) };
+    MemmoveIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memmove, Tys);
+  }
+  return MemmoveIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemsetIntrinsic() {
+  if (!MemsetIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context), Type::getInt32Ty(*Context) };
+    MemsetIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memset, Tys);
+  }
+  return MemsetIntrinsic;
+}
+
+ModulePass *llvm::createRewritePNaClLibraryCallsPass() {
+  return new RewritePNaClLibraryCalls();
+}
diff --git a/lib/Transforms/NaCl/StripAttributes.cpp b/lib/Transforms/NaCl/StripAttributes.cpp
new file mode 100644
index 0000000000..fb3a080e84
--- /dev/null
+++ b/lib/Transforms/NaCl/StripAttributes.cpp
@@ -0,0 +1,237 @@
+//===- StripAttributes.cpp - Remove attributes not supported by PNaCl------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass strips out attributes that are not supported by PNaCl's
+// stable ABI.  Currently, this strips out:
+//
+//  * Function and argument attributes from functions and function
+//    calls.
+//  * Calling conventions from functions and function calls.
+//  * The "align" attribute on functions.
+//  * The alignment argument of memcpy/memmove/memset intrinsic calls.
+//  * The "unnamed_addr" attribute on functions and global variables.
+//  * The distinction between "internal" and "private" linkage.
+//  * "protected" and "internal" visibility of functions and globals.
+//  * The arithmetic attributes "nsw", "nuw" and "exact".
+//  * It reduces the set of possible "align" attributes on memory
+//    accesses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that it can modify attributes of global
+  // variables.
+  class StripAttributes : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    StripAttributes() : ModulePass(ID) {
+      initializeStripAttributesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char StripAttributes::ID = 0;
+INITIALIZE_PASS(StripAttributes, "nacl-strip-attributes",
+                "Strip out attributes that are not part of PNaCl's ABI",
+                false, false)
+
+// Most attributes are just hints which can safely be removed.  A few
+// attributes can break programs if removed, so check all attributes
+// before removing them, in case LLVM adds new attributes.
+static void CheckAttributes(AttributeSet Attrs) {
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      switch (Attr->getKindAsEnum()) {
+        // The following attributes can affect calling conventions.
+        // Rather than complaining, we just strip these out.
+        // ExpandSmallArguments should have rendered SExt/ZExt
+        // meaningless since the function arguments will be at least
+        // 32-bit.
+        case Attribute::InReg:
+        case Attribute::SExt:
+        case Attribute::ZExt:
+        // These attributes influence ABI decisions that should not be
+        // visible to PNaCl pexes.
+        case Attribute::NonLazyBind:  // Only relevant to dynamic linking.
+        case Attribute::NoRedZone:
+        case Attribute::StackAlignment:
+
+        // The following attributes are just hints, which can be
+        // safely removed.
+        case Attribute::AlwaysInline:
+        case Attribute::InlineHint:
+        case Attribute::MinSize:
+        case Attribute::NoAlias:
+        case Attribute::NoBuiltin:
+        case Attribute::NoCapture:
+        case Attribute::NoDuplicate:
+        case Attribute::NoImplicitFloat:
+        case Attribute::NoInline:
+        case Attribute::NoReturn:
+        case Attribute::OptimizeForSize:
+        case Attribute::ReadNone:
+        case Attribute::ReadOnly:
+
+        // PNaCl does not support -fstack-protector in the translator.
+        case Attribute::StackProtect:
+        case Attribute::StackProtectReq:
+        case Attribute::StackProtectStrong:
+        // PNaCl does not support ASan in the translator.
+        case Attribute::SanitizeAddress:
+        case Attribute::SanitizeThread:
+        case Attribute::SanitizeMemory:
+
+        // The Language References cites setjmp() as an example of a
+        // function which returns twice, and says ReturnsTwice is
+        // necessary to disable optimizations such as tail calls.
+        // However, in the PNaCl ABI, setjmp() is an intrinsic, and
+        // user-defined functions are not allowed to return twice.
+        case Attribute::ReturnsTwice:
+
+        // NoUnwind is not a hint if it causes unwind info to be
+        // omitted, since this will prevent C++ exceptions from
+        // propagating.  In the future, when PNaCl supports zero-cost
+        // C++ exception handling using unwind info, we might allow
+        // NoUnwind and UWTable.  Alternatively, we might continue to
+        // disallow them, and just generate unwind info for all
+        // functions.
+        case Attribute::NoUnwind:
+        case Attribute::UWTable:
+          break;
+
+        case Attribute::ByVal:
+        case Attribute::StructRet:
+        case Attribute::Alignment:
+          Attrs.dump();
+          report_fatal_error(
+              "Attribute should already have been removed by ExpandByVal");
+
+        case Attribute::Naked:
+        case Attribute::Nest:
+          Attrs.dump();
+          report_fatal_error("Unsupported attribute");
+
+        default:
+          Attrs.dump();
+          report_fatal_error("Unrecognized attribute");
+      }
+    }
+  }
+}
+
+void stripGlobalValueAttrs(GlobalValue *GV) {
+  // In case source code uses __attribute__((visibility("hidden"))) or
+  // __attribute__((visibility("protected"))), strip these attributes.
+  GV->setVisibility(GlobalValue::DefaultVisibility);
+
+  GV->setUnnamedAddr(false);
+
+  // Convert "private" linkage to "internal" to reduce the number of
+  // linkage types that need to be represented in PNaCl's wire format.
+  //
+  // We convert "private" to "internal" rather than vice versa because
+  // "private" symbols are omitted from the nexe's symbol table, which
+  // would get in the way of debugging when an unstripped pexe is
+  // translated offline.
+  if (GV->getLinkage() == GlobalValue::PrivateLinkage)
+    GV->setLinkage(GlobalValue::InternalLinkage);
+}
+
+static unsigned normalizeAlignment(DataLayout *DL, unsigned Alignment,
+                                   Type *Ty, bool IsAtomic) {
+  unsigned MaxAllowed = 1;
+  if (Ty->isDoubleTy() || Ty->isFloatTy() || IsAtomic)
+    MaxAllowed = DL->getTypeAllocSize(Ty);
+  // If the alignment is set to 0, this means "use the default
+  // alignment for the target", which we fill in explicitly.
+  if (Alignment == 0 || Alignment >= MaxAllowed)
+    return MaxAllowed;
+  return 1;
+}
+
+void stripFunctionAttrs(DataLayout *DL, Function *Func) {
+  CheckAttributes(Func->getAttributes());
+  Func->setAttributes(AttributeSet());
+  Func->setCallingConv(CallingConv::C);
+  Func->setAlignment(0);
+
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+         Inst != E; ++Inst) {
+      CallSite Call(Inst);
+      if (Call) {
+        CheckAttributes(Call.getAttributes());
+        Call.setAttributes(AttributeSet());
+        Call.setCallingConv(CallingConv::C);
+
+        // Set memcpy(), memmove() and memset() to use pessimistic
+        // alignment assumptions.
+        if (MemIntrinsic *MemOp = dyn_cast<MemIntrinsic>(Inst)) {
+          Type *AlignTy = MemOp->getAlignmentCst()->getType();
+          MemOp->setAlignment(ConstantInt::get(AlignTy, 1));
+        }
+      } else if (OverflowingBinaryOperator *Op =
+                     dyn_cast<OverflowingBinaryOperator>(Inst)) {
+        cast<BinaryOperator>(Op)->setHasNoUnsignedWrap(false);
+        cast<BinaryOperator>(Op)->setHasNoSignedWrap(false);
+      } else if (PossiblyExactOperator *Op =
+                     dyn_cast<PossiblyExactOperator>(Inst)) {
+        cast<BinaryOperator>(Op)->setIsExact(false);
+      } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+        Load->setAlignment(normalizeAlignment(
+                               DL, Load->getAlignment(),
+                               Load->getType(),
+                               Load->isAtomic()));
+      } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+        Store->setAlignment(normalizeAlignment(
+                                DL, Store->getAlignment(),
+                                Store->getValueOperand()->getType(),
+                                Store->isAtomic()));
+      }
+    }
+  }
+}
+
+bool StripAttributes::runOnModule(Module &M) {
+  DataLayout DL(&M);
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    // Avoid stripping attributes from intrinsics because the
+    // constructor for Functions just adds them back again.  It would
+    // be confusing if the attributes were sometimes present on
+    // intrinsics and sometimes not.
+    if (!Func->isIntrinsic()) {
+      stripGlobalValueAttrs(Func);
+      stripFunctionAttrs(&DL, Func);
+    }
+  }
+  for (Module::global_iterator GV = M.global_begin(), E = M.global_end();
+       GV != E; ++GV) {
+    stripGlobalValueAttrs(GV);
+  }
+  return true;
+}
+
+ModulePass *llvm::createStripAttributesPass() {
+  return new StripAttributes();
+}
diff --git a/lib/Transforms/NaCl/StripMetadata.cpp b/lib/Transforms/NaCl/StripMetadata.cpp
new file mode 100644
index 0000000000..8461e7efbe
--- /dev/null
+++ b/lib/Transforms/NaCl/StripMetadata.cpp
@@ -0,0 +1,92 @@
+//===- StripMetadata.cpp - Strip non-stable non-debug metadata       ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripMetadata transformation strips instruction attachment
+// metadata, such as !tbaa and !prof metadata.
+// TODO: Strip NamedMetadata too.
+//
+// It does not strip debug metadata.  Debug metadata is used by debug
+// intrinsic functions and calls to those intrinsic functions.  Use the
+// -strip-debug or -strip pass to strip that instead.
+//
+// The goal of this pass is to reduce bitcode ABI surface area.
+// We don't know yet which kind of metadata is considered stable.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class StripMetadata : public ModulePass {
+  public:
+    static char ID;
+    explicit StripMetadata() : ModulePass(ID) {
+      initializeStripMetadataPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char StripMetadata::ID = 0;
+INITIALIZE_PASS(StripMetadata, "strip-metadata",
+                "Strip all non-stable non-debug metadata from a module.",
+                false, false)
+
+ModulePass *llvm::createStripMetadataPass() {
+  return new StripMetadata();
+}
+
+static bool IsWhitelistedMetadata(const NamedMDNode *node) {
+  // Leave debug metadata to the -strip-debug pass.
+  return node->getName().startswith("llvm.dbg.");
+}
+
+static bool DoStripMetadata(Module &M) {
+  bool Changed = false;
+
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) {
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) {
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+           ++BI) {
+        SmallVector<std::pair<unsigned, MDNode *>, 8> InstMeta;
+        // Let the debug metadata be stripped by the -strip-debug pass.
+        BI->getAllMetadataOtherThanDebugLoc(InstMeta);
+        for (size_t i = 0; i < InstMeta.size(); ++i) {
+          BI->setMetadata(InstMeta[i].first, NULL);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  // Strip unsupported named metadata.
+  SmallVector<NamedMDNode*, 8> ToErase;
+  for (Module::NamedMDListType::iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+    if (!IsWhitelistedMetadata(I))
+      ToErase.push_back(I);
+  }
+  for (size_t i = 0; i < ToErase.size(); ++i)
+    M.eraseNamedMetadata(ToErase[i]);
+
+  return Changed;
+}
+
+bool StripMetadata::runOnModule(Module &M) {
+  return DoStripMetadata(M);
+}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 64df089e1b..46a77daffb 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -37,7 +37,11 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
-   
+  // @LOCALMOD-BEGIN
+  // Copy all of the dependent libraries over.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    New->addLibrary(*I);
+  // @LOCALMOD-END
   // Loop over all of the global variables, making corresponding globals in the
   // new module.  Here we add them to the VMap and to the new Module.  We
   // don't worry about attributes or initializers, they will come later.
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6bea2ddd20..cadec21c50 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1678,6 +1678,7 @@ class LibCallSimplifierImpl {
   const LibCallSimplifier *LCS;
   bool UnsafeFPShrink;
 
+
   // Math library call optimizations.
   CosOpt Cos;
   PowOpt Pow;
@@ -1922,6 +1923,14 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
 
 Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
   LibCallOptimization *LCO = lookupOptimization(CI);
+
+  // @LOCALMOD-BEGIN
+  Function *Caller = CI->getParent()->getParent();
+  LibFunc::Func F = LibFunc::NumLibFuncs;
+  // Don't modify the implementation of known library functions
+  if (TLI->getLibFunc(Caller->getName(), F))
+    return 0;
+  // @LOCALMOD-END
   if (LCO) {
     IRBuilder<> Builder(CI);
     return LCO->optimizeCall(CI, TD, TLI, LCS, Builder);
@@ -1929,6 +1938,7 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
   return 0;
 }
 
+
 LibCallSimplifier::LibCallSimplifier(const DataLayout *TD,
                                      const TargetLibraryInfo *TLI,
                                      bool UnsafeFPShrink) {
diff --git a/lib/Wrap/CMakeLists.txt b/lib/Wrap/CMakeLists.txt
new file mode 100644
index 0000000000..da44a6bb56
--- /dev/null
+++ b/lib/Wrap/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMWrap
+  bitcode_wrapperer.cpp
+  file_wrapper_input.cpp
+  file_wrapper_output.cpp
+  wrapper_output.cpp
+)
+\ No newline at end of file
diff --git a/lib/Wrap/LLVMBuild.txt b/lib/Wrap/LLVMBuild.txt
new file mode 100644
index 0000000000..8750711338
--- /dev/null
+++ b/lib/Wrap/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Wrap/LLVMBuild.txt ------------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Wrap
+parent = Libraries
diff --git a/lib/Wrap/Makefile b/lib/Wrap/Makefile
new file mode 100644
index 0000000000..79aa2b3531
--- /dev/null
+++ b/lib/Wrap/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMWrap
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Wrap/bitcode_wrapperer.cpp b/lib/Wrap/bitcode_wrapperer.cpp
new file mode 100644
index 0000000000..eeb2825793
--- /dev/null
+++ b/lib/Wrap/bitcode_wrapperer.cpp
@@ -0,0 +1,355 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/bitcode_wrapperer.h"
+
+#include <stdio.h>
+#include <sys/stat.h>
+
+using std::vector;
+
+// The number of bytes in a 32 bit integer.
+static const uint32_t kWordSize = 4;
+
+// Number of LLVM-defined fixed fields in the header.
+static const uint32_t kLLVMFields = 4;
+
+// Total number of fixed fields in the header.
+static const uint32_t kFixedFields = 7;
+
+// The magic number that must exist for bitcode wrappers.
+static const uint32_t kWrapperMagicNumber = 0x0B17C0DE;
+
+// The version number associated with a wrapper file.
+// Note: llvm currently only allows the value 0. When this changes,
+// we should consider making this a command line option.
+static const uint32_t kLLVMVersionNumber = 0;
+
+// Fields defined by Android bitcode header.
+static const uint32_t kAndroidHeaderVersion = 0;
+static const uint32_t kAndroidTargetAPI = 0;
+static const uint32_t kAndroidDefaultCompilerVersion = 0;
+static const uint32_t kAndroidDefaultOptimizationLevel = 3;
+
+// PNaCl bitcode version number.
+static const uint32_t kPnaclBitcodeVersion = 0;
+
+// Max size for variable fields. Currently only used for writing them
+// out to files (the parsing works for arbitrary sizes).
+static const size_t kMaxVariableFieldSize = 256;
+
+BitcodeWrapperer::BitcodeWrapperer(WrapperInput* infile, WrapperOutput* outfile)
+    : infile_(infile),
+      outfile_(outfile),
+      buffer_size_(0),
+      cursor_(0),
+      infile_at_eof_(false),
+      infile_bc_offset_(0),
+      wrapper_bc_offset_(0),
+      wrapper_bc_size_(0),
+      android_header_version_(kAndroidHeaderVersion),
+      android_target_api_(kAndroidTargetAPI),
+      pnacl_bc_version_(0),
+      error_(false) {
+  buffer_.resize(kBitcodeWrappererBufferSize);
+  if (IsInputBitcodeWrapper()) {
+    ParseWrapperHeader();
+  } else if (IsInputBitcodeFile()) {
+    wrapper_bc_offset_ = kWordSize * kFixedFields;
+    wrapper_bc_size_ = GetInFileSize();
+  } else {
+    fprintf(stderr, "Error: input file is not a bitcode file.\n");
+    error_ = true;
+  }
+}
+
+BitcodeWrapperer::~BitcodeWrapperer() {
+  for(size_t i = 0; i < variable_field_data_.size(); i++) {
+    delete [] variable_field_data_[i];
+  }
+}
+
+
+void BitcodeWrapperer::ClearBuffer() {
+  buffer_size_ = 0;
+  cursor_ = 0;
+  infile_at_eof_ = false;
+}
+
+bool BitcodeWrapperer::Seek(uint32_t pos) {
+  if (infile_ != NULL && infile_->Seek(pos)) {
+    ClearBuffer();
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::CanReadWord() {
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    return GetBufferUnreadBytes() >= kWordSize;
+  } else {
+    return true;
+  }
+}
+
+void BitcodeWrapperer::FillBuffer() {
+  if (cursor_ > 0) {
+    // Before filling, move any remaining bytes to the
+    // front of the buffer. This allows us to assume
+    // that after the call to FillBuffer, readable
+    // text is contiguous.
+    if (cursor_ < buffer_size_) {
+      size_t i = 0;
+      while (cursor_ < buffer_size_) {
+        buffer_[i++] = buffer_[cursor_++];
+      }
+      cursor_ = 0;
+      buffer_size_ = i;
+    }
+  } else {
+    // Assume the buffer contents have been used,
+    // and we want to completely refill it.
+    buffer_size_ = 0;
+  }
+
+  // If we don't have an input, we can't refill the buffer at all.
+  if (infile_ == NULL) {
+    return;
+  }
+
+  // Now fill in remaining space.
+  size_t needed = buffer_.size() - buffer_size_;
+
+  while (buffer_.size() > buffer_size_) {
+    int actually_read = infile_->Read(&buffer_[buffer_size_], needed);
+    if (infile_->AtEof()) {
+      infile_at_eof_ = true;
+    }
+    if (actually_read) {
+      buffer_size_ += actually_read;
+      needed -= actually_read;
+    } else if (infile_at_eof_) {
+      break;
+    }
+  }
+}
+
+bool BitcodeWrapperer::ReadWord(uint32_t& word) {
+  if (!CanReadWord()) return false;
+  word = (((uint32_t) BufferLookahead(0)) << 0)
+      | (((uint32_t) BufferLookahead(1)) << 8)
+      | (((uint32_t) BufferLookahead(2)) << 16)
+      | (((uint32_t) BufferLookahead(3)) << 24);
+  cursor_ += kWordSize;
+  return true;
+}
+
+bool BitcodeWrapperer::WriteWord(uint32_t value) {
+  uint8_t buffer[kWordSize];
+  buffer[3] = (value >> 24) & 0xFF;
+  buffer[2] = (value >> 16) & 0xFF;
+  buffer[1] = (value >> 8)  & 0xFF;
+  buffer[0] = (value >> 0)  & 0xFF;
+  return outfile_->Write(buffer, kWordSize);
+}
+
+bool BitcodeWrapperer::WriteVariableFields() {
+  // This buffer may have to be bigger if we start using the fields
+  // for larger things.
+  uint8_t buffer[kMaxVariableFieldSize];
+  for (vector<BCHeaderField>::iterator it = header_fields_.begin();
+       it != header_fields_.end(); ++it) {
+    if (!it->Write(buffer, kMaxVariableFieldSize) ||
+        !outfile_->Write(buffer, it->GetTotalSize())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::ParseWrapperHeader() {
+  // Make sure LLVM-defined fields have been parsed
+  if (!IsInputBitcodeWrapper()) return false;
+  // Check the android/pnacl fields
+  if (!ReadWord(android_header_version_) ||
+      !ReadWord(android_target_api_) || !ReadWord(pnacl_bc_version_)) {
+    fprintf(stderr, "Error: file not long enough to contain header\n");
+    return false;
+  }
+  if (pnacl_bc_version_ != kPnaclBitcodeVersion) {
+    fprintf(stderr, "Error: bad PNaCl Bitcode version\n");
+    return false;
+  }
+  int field_data_total = wrapper_bc_offset_ - kWordSize * kFixedFields;
+  if (field_data_total > 0) {
+    // Read in the variable fields. We need to allocate space for the data.
+    int field_data_read = 0;
+
+    while (field_data_read < field_data_total) {
+      FillBuffer();
+      size_t buffer_needed = BCHeaderField::GetDataSizeFromSerialized(
+          &buffer_[cursor_]);
+      if (buffer_needed > buffer_.size()) {
+        buffer_.resize(buffer_needed +
+                       sizeof(BCHeaderField::FixedSubfield) * 2);
+        FillBuffer();
+      }
+      variable_field_data_.push_back(new uint8_t[buffer_needed]);
+
+      BCHeaderField field(BCHeaderField::kInvalid, 0,
+                          variable_field_data_.back());
+      field.Read(&buffer_[cursor_], buffer_size_);
+      header_fields_.push_back(field);
+      size_t field_size = field.GetTotalSize();
+      cursor_ += field_size;
+      field_data_read += field_size;
+      if (field_data_read > field_data_total) {
+        // We read too much data, the header is corrupted
+        fprintf(stderr, "Error: raw bitcode offset inconsistent with "
+                "variable field data\n");
+        return false;
+      }
+    }
+    Seek(0);
+  }
+  return true;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeWrapper() {
+  ResetCursor();
+  // First make sure that there are enough words (LLVM header)
+  // to peek at.
+  if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kLLVMFields * kWordSize) return false;
+  }
+
+  // Now make sure the magic number is right.
+  uint32_t first_word;
+  if ((!ReadWord(first_word)) ||
+      (kWrapperMagicNumber != first_word)) return false;
+
+  // Make sure the version is right.
+  uint32_t second_word;
+  if ((!ReadWord(second_word)) ||
+      (kLLVMVersionNumber != second_word)) return false;
+
+  // Make sure that the offset and size (for llvm) is defined.
+  uint32_t bc_offset;
+  uint32_t bc_size;
+  if (ReadWord(bc_offset) &&
+      ReadWord(bc_size)) {
+    // Before returning, save the extracted values.
+    wrapper_bc_offset_ = bc_offset;
+    infile_bc_offset_ = bc_offset;
+    wrapper_bc_size_ = bc_size;
+    return true;
+  }
+  // If reached, unable to read wrapped header.
+  return false;
+}
+
+bool BitcodeWrapperer::IsInputBitcodeFile() {
+  ResetCursor();
+  // First make sure that there are four bytes to peek at.
+  if (GetBufferUnreadBytes() < kWordSize) {
+    FillBuffer();
+    if (GetBufferUnreadBytes() < kWordSize) return false;
+  }
+  // If reached, Check if first 4 bytes match bitcode
+  // file magic number.
+  return (BufferLookahead(0) == 'B') &&
+      (BufferLookahead(1) == 'C') &&
+      (BufferLookahead(2) == 0xc0) &&
+      (BufferLookahead(3) == 0xde);
+}
+
+bool BitcodeWrapperer::BufferCopyInToOut(uint32_t size) {
+  while (size > 0) {
+    // Be sure buffer is non-empty before writing.
+    if (0 == buffer_size_) {
+      FillBuffer();
+      if (0 == buffer_size_) {
+        return false;
+      }
+    }
+    // copy the buffer to the output file.
+    size_t block = (buffer_size_ < size) ? buffer_size_ : size;
+    if (!outfile_->Write(&buffer_[cursor_], block)) return false;
+    size -= block;
+    buffer_size_ = 0;
+  }
+  // Be sure that there isn't more bytes on the input stream.
+  FillBuffer();
+  return buffer_size_ == 0;
+}
+
+void BitcodeWrapperer::AddHeaderField(BCHeaderField* field) {
+  vector<BCHeaderField>::iterator it = header_fields_.begin();
+  for (; it != header_fields_.end(); ++it) {
+    // If this field is the same as an existing one, overwrite it.
+    if (it->getID() == field->getID()) {
+      wrapper_bc_offset_ += (field->GetTotalSize() - it->GetTotalSize());
+      *it = *field;
+      break;
+    }
+  }
+  if (it == header_fields_.end()) { // there was no match, add a new field
+    header_fields_.push_back(*field);
+    wrapper_bc_offset_ += field->GetTotalSize();
+  }
+}
+
+bool BitcodeWrapperer::WriteBitcodeWrapperHeader() {
+  return
+      // Note: This writes out the 4 word header required by llvm wrapped
+      // bitcode.
+      WriteWord(kWrapperMagicNumber) &&
+      WriteWord(kLLVMVersionNumber) &&
+      WriteWord(wrapper_bc_offset_) &&
+      WriteWord(wrapper_bc_size_) &&
+      // 2 fixed fields defined by Android
+      WriteWord(android_header_version_) &&
+      WriteWord(android_target_api_) &&
+      // PNaClBitcode version
+      WriteWord(kPnaclBitcodeVersion) &&
+      // Common variable-length fields
+      WriteVariableFields();
+}
+
+void BitcodeWrapperer::PrintWrapperHeader() {
+  if (error_) {
+    fprintf(stderr, "Error condition exists: the following"
+            "data may not be reliable\n");
+  }
+  fprintf(stderr, "Wrapper magic:\t\t%x\n", kWrapperMagicNumber);
+  fprintf(stderr, "LLVM Bitcode version:\t%d\n", kLLVMVersionNumber);
+  fprintf(stderr, "Raw bitcode offset:\t%d\n", wrapper_bc_offset_);
+  fprintf(stderr, "Raw bitcode size:\t%d\n", wrapper_bc_size_);
+  fprintf(stderr, "Android header version:\t%d\n", android_header_version_);
+  fprintf(stderr, "Android target API:\t%d\n", android_target_api_);
+  fprintf(stderr, "PNaCl bitcode version:\t%d\n", kPnaclBitcodeVersion);
+  for (size_t i = 0; i < header_fields_.size(); i++) header_fields_[i].Print();
+}
+
+bool BitcodeWrapperer::GenerateWrappedBitcodeFile() {
+  if (!error_ &&
+      WriteBitcodeWrapperHeader() &&
+      Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_)) {
+    off_t dangling = wrapper_bc_size_ & 3;
+    if (dangling) {
+      return outfile_->Write((const uint8_t*) "\0\0\0\0", 4 - dangling);
+    }
+    return true;
+  }
+  return false;
+}
+
+bool BitcodeWrapperer::GenerateRawBitcodeFile() {
+  return !error_ && Seek(infile_bc_offset_) &&
+      BufferCopyInToOut(wrapper_bc_size_);
+}
diff --git a/lib/Wrap/file_wrapper_input.cpp b/lib/Wrap/file_wrapper_input.cpp
new file mode 100644
index 0000000000..fc592e0246
--- /dev/null
+++ b/lib/Wrap/file_wrapper_input.cpp
@@ -0,0 +1,53 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include <sys/stat.h>
+#include <stdlib.h>
+
+#include "llvm/Wrap/file_wrapper_input.h"
+
+FileWrapperInput::FileWrapperInput(const std::string& name) :
+    _name(name), _at_eof(false), _size_found(false), _size(0) {
+  _file = fopen(name.c_str(), "rb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperInput::~FileWrapperInput() {
+  fclose(_file);
+}
+
+size_t FileWrapperInput::Read(uint8_t* buffer, size_t wanted) {
+  size_t found = fread((char*) buffer, 1, wanted, _file);
+  if (feof(_file) || ferror(_file)) {
+    _at_eof = true;
+  }
+  return found;
+}
+
+bool FileWrapperInput::AtEof() {
+  return _at_eof;
+}
+
+off_t FileWrapperInput::Size() {
+  if (_size_found) return _size;
+  struct stat st;
+  if (0 == stat(_name.c_str(), &st)) {
+    _size_found = true;
+    _size = st.st_size;
+    return _size;
+  } else {
+    fprintf(stderr, "Unable to compute file size: %s\n", _name.c_str());
+    exit(1);
+  }
+  // NOT REACHABLE.
+  return 0;
+}
+
+bool FileWrapperInput::Seek(uint32_t pos) {
+  return 0 == fseek(_file, (long) pos, SEEK_SET);
+}
diff --git a/lib/Wrap/file_wrapper_output.cpp b/lib/Wrap/file_wrapper_output.cpp
new file mode 100644
index 0000000000..f9f126868d
--- /dev/null
+++ b/lib/Wrap/file_wrapper_output.cpp
@@ -0,0 +1,37 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+#include "llvm/Wrap/file_wrapper_output.h"
+#include <stdlib.h>
+
+
+FileWrapperOutput::FileWrapperOutput(const std::string& name)
+    : _name(name) {
+  _file = fopen(name.c_str(), "wb");
+  if (NULL == _file) {
+    fprintf(stderr, "Unable to open: %s\n", name.c_str());
+    exit(1);
+  }
+}
+
+FileWrapperOutput::~FileWrapperOutput() {
+  fclose(_file);
+}
+
+bool FileWrapperOutput::Write(uint8_t byte) {
+  return EOF != fputc(byte, _file);
+}
+
+bool FileWrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  if (!buffer) {
+    return false;
+  }
+
+  if (buffer_size > 0) {
+    return buffer_size == fwrite(buffer, 1, buffer_size, _file);
+  } else {
+    return true;
+  }
+}
diff --git a/lib/Wrap/wrapper_output.cpp b/lib/Wrap/wrapper_output.cpp
new file mode 100644
index 0000000000..493f29efa8
--- /dev/null
+++ b/lib/Wrap/wrapper_output.cpp
@@ -0,0 +1,9 @@
+#include "llvm/Wrap/wrapper_output.h"
+
+bool WrapperOutput::Write(const uint8_t* buffer, size_t buffer_size) {
+  // Default implementation that uses the byte write routine.
+  for (size_t i = 0; i < buffer_size; ++i) {
+    if (!Write(buffer[i])) return false;
+  }
+  return true;
+}
diff --git a/projects/sample/autoconf/config.sub b/projects/sample/autoconf/config.sub
index 9d22c1e52e..488871aed7 100755
--- a/projects/sample/autoconf/config.sub
+++ b/projects/sample/autoconf/config.sub
@@ -132,6 +132,10 @@ case $maybe_os in
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
     ;;
+  nacl)
+    os=-nacl
+    basic_machine=pnacl-unknown
+    ;;
   *)
     basic_machine=`echo $1 | sed 's/-[^-]*$//'`
     if [ $basic_machine != $1 ]
@@ -348,6 +352,8 @@ case $basic_machine in
 	i*86 | x86_64)
 	  basic_machine=$basic_machine-pc
 	  ;;
+	pnacl-*)
+	  ;;
 	# Object if more than one company name word.
 	*-*-*)
 		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
@@ -1366,6 +1372,8 @@ case $os in
 			;;
 		esac
 		;;
+	-nacl)
+		;;
 	-nto-qnx*)
 		;;
 	-nto*)
diff --git a/projects/sample/configure b/projects/sample/configure
index a2c70c626d..8484a2ae3c 100755
--- a/projects/sample/configure
+++ b/projects/sample/configure
@@ -3690,6 +3690,11 @@ else
     llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
     llvm_cv_os_type="GNU"
     llvm_cv_platform_type="Unix" ;;
+  *-*-nacl*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NativeClient"
+    llvm_cv_platform_type="Unix" ;;
   *-*-solaris*)
     llvm_cv_link_all_option="-Wl,-z,allextract"
     llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
diff --git a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
index 60bc6a62f5..05a6bab99d 100644
--- a/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
+++ b/test/CodeGen/ARM/fast-isel-GEP-coalesce.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 %struct.A = type { i32, [2 x [2 x i32]], i8, [3 x [3 x [3 x i32]]] }
@@ -26,8 +27,8 @@ entry:
 ; THUMB: t2
   %addr = alloca i32*, align 4
   store i32* getelementptr inbounds ([3 x [3 x %struct.A]]* @A, i32 0, i32 2, i32 2, i32 3, i32 1, i32 2, i32 2), i32** %addr, align 4
-; ARM: movw r1, #1148
-; ARM: add r0, r0, r1
+; ARM: movw [[R:r[0-9]+]], #1148
+; ARM: add r0, r{{[0-9]+}}, [[R]]
 ; THUMB: addw r0, r0, #1148
   %0 = load i32** %addr, align 4
   ret i32* %0
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
new file mode 100644
index 0000000000..4e28a10cd1
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0  -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+
+; Check unaligned stores
+%struct.anon = type <{ float }>
+
+@a = common global %struct.anon* null, align 4
+
+define void @unaligned_store(float %x, float %y) nounwind {
+entry:
+; ARM: @unaligned_store
+; ARM: vmov r1, s0
+; ARM: str r1, [r0]
+
+; THUMB: @unaligned_store
+; THUMB: vmov r1, s0
+; THUMB: str r1, [r0]
+
+  %add = fadd float %x, %y
+  %0 = load %struct.anon** @a, align 4
+  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
+  store float %add, float* %x1, align 1
+  ret void
+}
+
+; Doublewords require only word-alignment.
+; rdar://10528060
+%struct.anon.0 = type { double }
+
+@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
+
+define void @word_aligned_f64_store(double %a, double %b) nounwind {
+entry:
+; ARM: @word_aligned_f64_store
+; THUMB: @word_aligned_f64_store
+  %add = fadd double %a, %b
+  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
+; ARM: vstr d16, [r0]
+; THUMB: vstr d16, [r0]
+  ret void
+}
+
+; Check unaligned loads of floats
+%class.TAlignTest = type <{ i16, float }>
+
+define zeroext i1 @unaligned_f32_load(%class.TAlignTest* %this) nounwind align 2 {
+entry:
+; ARM: @unaligned_f32_load
+; THUMB: @unaligned_f32_load
+  %0 = alloca %class.TAlignTest*, align 4
+  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
+  %1 = load %class.TAlignTest** %0
+  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
+  %3 = load float* %2, align 1
+  %4 = fcmp une float %3, 0.000000e+00
+; ARM: ldr r[[R:[0-9]+]], [r0, #2]
+; ARM: vmov s0, r[[R]]
+; ARM: vcmpe.f32 s0, #0
+; THUMB: ldr.w r[[R:[0-9]+]], [r0, #2]
+; THUMB: vmov s0, r[[R]]
+; THUMB: vcmpe.f32 s0, #0
+  ret i1 %4
+}
+
+define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_store
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_store
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+
+  store i16 %x, i16* %y, align 1
+  ret void
+}
+
+define i16 @unaligned_i16_load(i16* %x) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i16_load
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+
+; THUMB-STRICT-ALIGN: @unaligned_i16_load
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+
+  %0 = load i16* %x, align 1
+  ret i16 %0
+}
+
+define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i32_store
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+; ARM-STRICT-ALIGN: strb
+
+; THUMB-STRICT-ALIGN: @unaligned_i32_store
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+; THUMB-STRICT-ALIGN: strb
+
+  store i32 %x, i32* %y, align 1
+  ret void
+}
+
+define i32 @unaligned_i32_load(i32* %x) nounwind {
+entry:
+; ARM-STRICT-ALIGN: @unaligned_i32_load
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+; ARM-STRICT-ALIGN: ldrb
+
+; THUMB-STRICT-ALIGN: @unaligned_i32_load
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+; THUMB-STRICT-ALIGN: ldrb
+
+  %0 = load i32* %x, align 1
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/fast-isel-binary.ll b/test/CodeGen/ARM/fast-isel-binary.ll
index 723383e04b..31596276f3 100644
--- a/test/CodeGen/ARM/fast-isel-binary.ll
+++ b/test/CodeGen/ARM/fast-isel-binary.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test add with non-legal types
diff --git a/test/CodeGen/ARM/fast-isel-br-const.ll b/test/CodeGen/ARM/fast-isel-br-const.ll
index 4e6efd2489..293302f572 100644
--- a/test/CodeGen/ARM/fast-isel-br-const.ll
+++ b/test/CodeGen/ARM/fast-isel-br-const.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
@@ -7,8 +8,8 @@ entry:
 ; ARM: t1:
   %x = add i32 %a, %b  
   br i1 1, label %if.then, label %if.else
-; THUMB-NOT: b LBB0_1
-; ARM-NOT:  b LBB0_1
+; THUMB-NOT: b {{\.?}}LBB0_1
+; ARM-NOT:  b {{\.?}}LBB0_1
 
 if.then:                                          ; preds = %entry
   call void @foo1()
@@ -16,8 +17,8 @@ if.then:                                          ; preds = %entry
 
 if.else:                                          ; preds = %entry
   br i1 0, label %if.then2, label %if.else3
-; THUMB: b LBB0_4
-; ARM:  b LBB0_4
+; THUMB: b {{\.?}}LBB0_4
+; ARM:  b {{\.?}}LBB0_4
 
 if.then2:                                         ; preds = %if.else
   call void @foo2()
@@ -26,8 +27,8 @@ if.then2:                                         ; preds = %if.else
 if.else3:                                         ; preds = %if.else
   %y = sub i32 %a, %b
   br i1 1, label %if.then5, label %if.end
-; THUMB-NOT: b LBB0_5
-; ARM-NOT:  b LBB0_5
+; THUMB-NOT: b {{\.?}}LBB0_5
+; ARM-NOT:  b {{\.?}}LBB0_5
 
 if.then5:                                         ; preds = %if.else3
   call void @foo1()
diff --git a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
index b6f201728c..da829e929e 100644
--- a/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
+++ b/test/CodeGen/ARM/fast-isel-call-multi-reg-return.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Fast-isel can't handle non-double multi-reg retvals.
 ; This test just check to make sure we don't hit the assert in FinishCall.
diff --git a/test/CodeGen/ARM/fast-isel-call.ll b/test/CodeGen/ARM/fast-isel-call.ll
index b6c9098613..55911e5c1c 100644
--- a/test/CodeGen/ARM/fast-isel-call.ll
+++ b/test/CodeGen/ARM/fast-isel-call.ll
@@ -1,9 +1,16 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -mattr=-vfp2 | FileCheck %s --check-prefix=ARM-NOVFP
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -mattr=-vfp2 | FileCheck %s --check-prefix=THUMB-NOVFP
+
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
 
 define i32 @t0(i1 zeroext %a) nounwind {
   %1 = zext i1 %a to i32
@@ -88,53 +95,53 @@ declare zeroext i1 @t9();
 define i32 @t10(i32 %argc, i8** nocapture %argv) {
 entry:
 ; ARM: @t10
-; ARM: movw r0, #0
-; ARM: movw r1, #248
-; ARM: movw r2, #187
-; ARM: movw r3, #28
-; ARM: movw r9, #40
-; ARM: movw r12, #186
-; ARM: uxtb r0, r0
-; ARM: uxtb r1, r1
-; ARM: uxtb r2, r2
-; ARM: uxtb r3, r3
-; ARM: uxtb r9, r9
-; ARM: str r9, [sp]
-; ARM: uxtb r9, r12
-; ARM: str r9, [sp, #4]
-; ARM: bl _bar
+; ARM: movw [[R0:l?r[0-9]*]], #0
+; ARM: movw [[R1:l?r[0-9]*]], #248
+; ARM: movw [[R2:l?r[0-9]*]], #187
+; ARM: movw [[R3:l?r[0-9]*]], #28
+; ARM: movw [[R4:l?r[0-9]*]], #40
+; ARM: movw [[R5:l?r[0-9]*]], #186
+; ARM: uxtb [[R0]], [[R0]]
+; ARM: uxtb [[R1]], [[R1]]
+; ARM: uxtb [[R2]], [[R2]]
+; ARM: uxtb [[R3]], [[R3]]
+; ARM: uxtb [[R4]], [[R4]]
+; ARM: str [[R4]], [sp]
+; ARM: uxtb [[R4]], [[R5]]
+; ARM: str [[R4]], [sp, #4]
+; ARM: bl {{_?}}bar
 ; ARM-LONG: @t10
-; ARM-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; ARM-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; ARM-LONG: ldr lr, [lr]
-; ARM-LONG: blx lr
+; ARM-LONG: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; ARM-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; ARM-LONG: ldr [[R]], {{\[}}[[R]]{{\]}}
+; ARM-LONG: blx [[R]]
 ; THUMB: @t10
-; THUMB: movs r0, #0
-; THUMB: movt r0, #0
-; THUMB: movs r1, #248
-; THUMB: movt r1, #0
-; THUMB: movs r2, #187
-; THUMB: movt r2, #0
-; THUMB: movs r3, #28
-; THUMB: movt r3, #0
-; THUMB: movw r9, #40
-; THUMB: movt r9, #0
-; THUMB: movw r12, #186
-; THUMB: movt r12, #0
-; THUMB: uxtb r0, r0
-; THUMB: uxtb r1, r1
-; THUMB: uxtb r2, r2
-; THUMB: uxtb r3, r3
-; THUMB: uxtb.w r9, r9
-; THUMB: str.w r9, [sp]
-; THUMB: uxtb.w r9, r12
-; THUMB: str.w r9, [sp, #4]
-; THUMB: bl _bar
+; THUMB: movs [[R0:l?r[0-9]*]], #0
+; THUMB: movt [[R0]], #0
+; THUMB: movs [[R1:l?r[0-9]*]], #248
+; THUMB: movt [[R1]], #0
+; THUMB: movs [[R2:l?r[0-9]*]], #187
+; THUMB: movt [[R2]], #0
+; THUMB: movs [[R3:l?r[0-9]*]], #28
+; THUMB: movt [[R3]], #0
+; THUMB: movw [[R4:l?r[0-9]*]], #40
+; THUMB: movt [[R4]], #0
+; THUMB: movw [[R5:l?r[0-9]*]], #186
+; THUMB: movt [[R5]], #0
+; THUMB: uxtb [[R0]], [[R0]]
+; THUMB: uxtb [[R1]], [[R1]]
+; THUMB: uxtb [[R2]], [[R2]]
+; THUMB: uxtb [[R3]], [[R3]]
+; THUMB: uxtb.w [[R4]], [[R4]]
+; THUMB: str.w [[R4]], [sp]
+; THUMB: uxtb.w [[R4]], [[R5]]
+; THUMB: str.w [[R4]], [sp, #4]
+; THUMB: bl {{_?}}bar
 ; THUMB-LONG: @t10
-; THUMB-LONG: movw lr, :lower16:L_bar$non_lazy_ptr
-; THUMB-LONG: movt lr, :upper16:L_bar$non_lazy_ptr
-; THUMB-LONG: ldr.w lr, [lr]
-; THUMB-LONG: blx lr
+; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}}
+; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}}
+; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}}
+; THUMB-LONG: blx [[R]]
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
   ret i32 0
 }
@@ -147,12 +154,12 @@ define i32 @bar0(i32 %i) nounwind {
 
 define void @foo3() uwtable {
 ; ARM: movw    r0, #0
-; ARM: movw    r1, :lower16:_bar0
-; ARM: movt    r1, :upper16:_bar0
+; ARM: {{(movw r1, :lower16:_?bar0)|(ldr r1, .LCPI)}}
+; ARM: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; ARM: blx     r1
 ; THUMB: movs    r0, #0
-; THUMB: movw    r1, :lower16:_bar0
-; THUMB: movt    r1, :upper16:_bar0
+; THUMB: {{(movw r1, :lower16:_?bar0)|(ldr.n r1, .LCPI)}}
+; THUMB: {{(movt r1, :upper16:_?bar0)|(ldr r1, \[r1\])}}
 ; THUMB: blx     r1
   %fptr = alloca i32 (i32)*, align 8
   store i32 (i32)* @bar0, i32 (i32)** %fptr, align 8
@@ -164,66 +171,23 @@ define void @foo3() uwtable {
 define i32 @LibCall(i32 %a, i32 %b) {
 entry:
 ; ARM: LibCall
-; ARM: bl ___udivsi3
+; ARM: bl {{___udivsi3|__aeabi_uidiv}}
 ; ARM-LONG: LibCall
-; ARM-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; ARM-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; ARM-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr r2, .LCPI)}}
+; ARM-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r2, [r2]
 ; ARM-LONG: blx r2
 ; THUMB: LibCall
-; THUMB: bl ___udivsi3
+; THUMB: bl {{___udivsi3|__aeabi_uidiv}}
 ; THUMB-LONG: LibCall
-; THUMB-LONG: movw r2, :lower16:L___udivsi3$non_lazy_ptr
-; THUMB-LONG: movt r2, :upper16:L___udivsi3$non_lazy_ptr
+; THUMB-LONG: {{(movw r2, :lower16:L___udivsi3\$non_lazy_ptr)|(ldr.n r2, .LCPI)}}
+; THUMB-LONG: {{(movt r2, :upper16:L___udivsi3\$non_lazy_ptr)?}}
 ; THUMB-LONG: ldr r2, [r2]
 ; THUMB-LONG: blx r2
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
-define i32 @VarArg() nounwind {
-entry:
-  %i = alloca i32, align 4
-  %j = alloca i32, align 4
-  %k = alloca i32, align 4
-  %m = alloca i32, align 4
-  %n = alloca i32, align 4
-  %tmp = alloca i32, align 4
-  %0 = load i32* %i, align 4
-  %1 = load i32* %j, align 4
-  %2 = load i32* %k, align 4
-  %3 = load i32* %m, align 4
-  %4 = load i32* %n, align 4
-; ARM: VarArg
-; ARM: mov r7, sp
-; ARM: movw r0, #5
-; ARM: ldr r1, [r7, #-4]
-; ARM: ldr r2, [r7, #-8]
-; ARM: ldr r3, [r7, #-12]
-; ARM: ldr r9, [sp, #16]
-; ARM: ldr r12, [sp, #12]
-; ARM: str r9, [sp]
-; ARM: str r12, [sp, #4]
-; ARM: bl _CallVariadic
-; THUMB: mov r7, sp
-; THUMB: movs r0, #5
-; THUMB: movt r0, #0
-; THUMB: ldr r1, [sp, #28]
-; THUMB: ldr r2, [sp, #24]
-; THUMB: ldr r3, [sp, #20]
-; THUMB: ldr.w r9, [sp, #16]
-; THUMB: ldr.w r12, [sp, #12]
-; THUMB: str.w r9, [sp]
-; THUMB: str.w r12, [sp, #4]
-; THUMB: bl _CallVariadic
-  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
-  store i32 %call, i32* %tmp, align 4
-  %5 = load i32* %tmp, align 4
-  ret i32 %5
-}
-
-declare i32 @CallVariadic(i32, ...)
-
 ; Test fastcc
 
 define fastcc void @fast_callee(float %i) ssp {
diff --git a/test/CodeGen/ARM/fast-isel-cmp-imm.ll b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
index 660156aa48..45ef4ed75a 100644
--- a/test/CodeGen/ARM/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/ARM/fast-isel-cmp-imm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1a(float %a) uwtable ssp {
diff --git a/test/CodeGen/ARM/fast-isel-conversion.ll b/test/CodeGen/ARM/fast-isel-conversion.ll
index 686ccad029..91034fb24f 100644
--- a/test/CodeGen/ARM/fast-isel-conversion.ll
+++ b/test/CodeGen/ARM/fast-isel-conversion.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Test sitofp
diff --git a/test/CodeGen/ARM/fast-isel-crash.ll b/test/CodeGen/ARM/fast-isel-crash.ll
index 8fb4b66b7d..ec9cf8d950 100644
--- a/test/CodeGen/ARM/fast-isel-crash.ll
+++ b/test/CodeGen/ARM/fast-isel-crash.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
 
 %union.anon = type { <16 x i32> }
 
diff --git a/test/CodeGen/ARM/fast-isel-crash2.ll b/test/CodeGen/ARM/fast-isel-crash2.ll
index f245168a8e..d606877673 100644
--- a/test/CodeGen/ARM/fast-isel-crash2.ll
+++ b/test/CodeGen/ARM/fast-isel-crash2.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-apple-darwin
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi
 ; rdar://9515076
 ; (Make sure this doesn't crash.)
 
diff --git a/test/CodeGen/ARM/fast-isel-deadcode.ll b/test/CodeGen/ARM/fast-isel-deadcode.ll
index 3a943d854b..5e6666c47d 100644
--- a/test/CodeGen/ARM/fast-isel-deadcode.ll
+++ b/test/CodeGen/ARM/fast-isel-deadcode.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -fast-isel-abort -verify-machineinstrs -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 ; Target-specific selector can't properly handle the double because it isn't
 ; being passed via a register, so the materialized arguments become dead code.
diff --git a/test/CodeGen/ARM/fast-isel-fold.ll b/test/CodeGen/ARM/fast-isel-fold.ll
index 7a65295f01..38e1f884bf 100644
--- a/test/CodeGen/ARM/fast-isel-fold.ll
+++ b/test/CodeGen/ARM/fast-isel-fold.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB
 
 @a = global i8 1, align 1
diff --git a/test/CodeGen/ARM/fast-isel-frameaddr.ll b/test/CodeGen/ARM/fast-isel-frameaddr.ll
index c256e73ab9..5ae7ad7f14 100644
--- a/test/CodeGen/ARM/fast-isel-frameaddr.ll
+++ b/test/CodeGen/ARM/fast-isel-frameaddr.ll
@@ -46,8 +46,7 @@ entry:
 ; LINUX-ARM: frameaddr_index1:
 ; LINUX-ARM: push {r11}
 ; LINUX-ARM: mov r11, sp
-; LINUX-ARM: mov r0, r11
-; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r11]
 
 ; LINUX-THUMB2: frameaddr_index1:
 ; LINUX-THUMB2: str r7, [sp, #-4]!
@@ -80,8 +79,7 @@ entry:
 ; LINUX-ARM: frameaddr_index3:
 ; LINUX-ARM: push {r11}
 ; LINUX-ARM: mov r11, sp
-; LINUX-ARM: mov r0, r11
-; LINUX-ARM: ldr r0, [r0]
+; LINUX-ARM: ldr r0, [r11]
 ; LINUX-ARM: ldr r0, [r0]
 ; LINUX-ARM: ldr r0, [r0]
 
diff --git a/test/CodeGen/ARM/fast-isel-icmp.ll b/test/CodeGen/ARM/fast-isel-icmp.ll
index 8357ed5c54..04a92825af 100644
--- a/test/CodeGen/ARM/fast-isel-icmp.ll
+++ b/test/CodeGen/ARM/fast-isel-icmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @icmp_i16_signed(i16 %a, i16 %b) nounwind {
diff --git a/test/CodeGen/ARM/fast-isel-indirectbr.ll b/test/CodeGen/ARM/fast-isel-indirectbr.ll
index ebc0e8426d..2456ef4420 100644
--- a/test/CodeGen/ARM/fast-isel-indirectbr.ll
+++ b/test/CodeGen/ARM/fast-isel-indirectbr.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define void @t1(i8* %x) {
diff --git a/test/CodeGen/ARM/fast-isel-intrinsic.ll b/test/CodeGen/ARM/fast-isel-intrinsic.ll
index 48105dd389..6b9e62b712 100644
--- a/test/CodeGen/ARM/fast-isel-intrinsic.ll
+++ b/test/CodeGen/ARM/fast-isel-intrinsic.ll
@@ -1,38 +1,44 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -arm-long-calls | FileCheck %s --check-prefix=ARM-LONG
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -arm-long-calls | FileCheck %s --check-prefix=THUMB-LONG
 
+; Note that some of these tests assume that relocations are either
+; movw/movt or constant pool loads. Different platforms will select
+; different approaches.
+
 @message1 = global [60 x i8] c"The LLVM Compiler Infrastructure\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 1
 @temp = common global [60 x i8] zeroinitializer, align 1
 
 define void @t1() nounwind ssp {
 ; ARM: t1
-; ARM: movw r0, :lower16:_message1
-; ARM: movt r0, :upper16:_message1
+; ARM: {{(movw r0, :lower16:_?message1)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; ARM: add r0, r0, #5
 ; ARM: movw r1, #64
 ; ARM: movw r2, #10
 ; ARM: uxtb r1, r1
-; ARM: bl _memset
+; ARM: bl {{_?}}memset
 ; ARM-LONG: t1
-; ARM-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; ARM-LONG: {{(movw r3, :lower16:L_memset\$non_lazy_ptr)|(ldr r3, .LCPI)}}
+; ARM-LONG: {{(movt r3, :upper16:L_memset\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t1
-; THUMB: movw r0, :lower16:_message1
-; THUMB: movt r0, :upper16:_message1
+; THUMB: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB: adds r0, #5
 ; THUMB: movs r1, #64
 ; THUMB: movt r1, #0
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
 ; THUMB: uxtb r1, r1
-; THUMB: bl _memset
+; THUMB: bl {{_?}}memset
 ; THUMB-LONG: t1
-; THUMB-LONG: movw r3, :lower16:L_memset$non_lazy_ptr
-; THUMB-LONG: movt r3, :upper16:L_memset$non_lazy_ptr
+; THUMB-LONG: {{(movw r0, :lower16:_?message1)|(ldr.n r0, .LCPI)}}
+; THUMB-LONG: {{(movt r0, :upper16:_?message1)|(ldr r0, \[r0\])}}
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
   call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @message1, i32 0, i32 5), i8 64, i32 10, i32 4, i1 false)
@@ -43,34 +49,36 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @t2() nounwind ssp {
 ; ARM: t2
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #17
-; ARM: str r0, [sp]                @ 4-byte Spill
+; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; ARM: mov r0, r1
-; ARM: ldr r1, [sp]                @ 4-byte Reload
-; ARM: bl _memcpy
+; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload
+; ARM: bl {{_?}}memcpy
 ; ARM-LONG: t2
-; ARM-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; ARM-LONG: {{(movw r3, :lower16:L_memcpy\$non_lazy_ptr)|(ldr r3, .LCPI)}}
+; ARM-LONG: {{(movt r3, :upper16:L_memcpy\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t2
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #17
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memcpy
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memcpy
 ; THUMB-LONG: t2
-; THUMB-LONG: movw r3, :lower16:L_memcpy$non_lazy_ptr
-; THUMB-LONG: movt r3, :upper16:L_memcpy$non_lazy_ptr
+; THUMB-LONG: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB-LONG: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 17, i32 4, i1 false)
@@ -81,32 +89,34 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t3() nounwind ssp {
 ; ARM: t3
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: add r1, r0, #4
 ; ARM: add r0, r0, #16
 ; ARM: movw r2, #10
 ; ARM: mov r0, r1
-; ARM: bl _memmove
+; ARM: bl {{_?}}memmove
 ; ARM-LONG: t3
-; ARM-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
-; ARM-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; ARM-LONG: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM-LONG: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM-LONG: ldr r3, [r3]
 ; ARM-LONG: blx r3
 ; THUMB: t3
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: adds r1, r0, #4
 ; THUMB: adds r0, #16
 ; THUMB: movs r2, #10
 ; THUMB: movt r2, #0
+; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill
 ; THUMB: mov r0, r1
-; THUMB: bl _memmove
+; THUMB: ldr r1,  [sp[[SLOT]]] @ 4-byte Reload
+; THUMB: bl {{_?}}memmove
 ; THUMB-LONG: t3
-; THUMB-LONG: movw r3, :lower16:L_memmove$non_lazy_ptr
-; THUMB-LONG: movt r3, :upper16:L_memmove$non_lazy_ptr
+; THUMB-LONG: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB-LONG: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB-LONG: ldr r3, [r3]
 ; THUMB-LONG: blx r3
   call void @llvm.memmove.p0i8.p0i8.i32(i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 4), i8* getelementptr inbounds ([60 x i8]* @temp, i32 0, i32 16), i32 10, i32 1, i1 false)
@@ -115,8 +125,8 @@ define void @t3() nounwind ssp {
 
 define void @t4() nounwind ssp {
 ; ARM: t4
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0, #16]
 ; ARM: str r1, [r0, #4]
@@ -126,8 +136,8 @@ define void @t4() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t4
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0, #16]
 ; THUMB: str r1, [r0, #4]
@@ -144,8 +154,8 @@ declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 define void @t5() nounwind ssp {
 ; ARM: t5
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrh r1, [r0, #16]
 ; ARM: strh r1, [r0, #4]
@@ -159,8 +169,8 @@ define void @t5() nounwind ssp {
 ; ARM: strh r1, [r0, #12]
 ; ARM: bx lr
 ; THUMB: t5
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrh r1, [r0, #16]
 ; THUMB: strh r1, [r0, #4]
@@ -179,8 +189,8 @@ define void @t5() nounwind ssp {
 
 define void @t6() nounwind ssp {
 ; ARM: t6
-; ARM: movw r0, :lower16:L_temp$non_lazy_ptr
-; ARM: movt r0, :upper16:L_temp$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldrb r1, [r0, #16]
 ; ARM: strb r1, [r0, #4]
@@ -204,8 +214,8 @@ define void @t6() nounwind ssp {
 ; ARM: strb r1, [r0, #13]
 ; ARM: bx lr
 ; THUMB: t6
-; THUMB: movw r0, :lower16:L_temp$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_temp$non_lazy_ptr
+; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldrb r1, [r0, #16]
 ; THUMB: strb r1, [r0, #4]
diff --git a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
index dfb8c53735..cf294bcfbe 100644
--- a/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldr-str-arm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=ARM
 
 define i32 @t1(i32* nocapture %ptr) nounwind readonly {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
index 0b5267ddc9..c05ea398d7 100644
--- a/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
+++ b/test/CodeGen/ARM/fast-isel-ldrh-strh-arm.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; rdar://10418009
 
 define zeroext i16 @t1(i16* nocapture %a) nounwind uwtable readonly ssp {
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index b180e439dd..0bc9395e2d 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 ; rdar://10412592
 
 ; Note: The Thumb code is being generated by the target-independent selector.
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 867d53f973..6bb9ea3a8c 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
 
 @g = global i32 0, align 4
diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll
index 27731def1f..48f93225b6 100644
--- a/test/CodeGen/ARM/fast-isel-pred.ll
+++ b/test/CodeGen/ARM/fast-isel-pred.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-apple-darwin < %s
+; RUN: llc -O0 -verify-machineinstrs -mtriple=armv7-linux-gnueabi < %s
 
 define i32 @main() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll
index 563880dab0..ee150facac 100644
--- a/test/CodeGen/ARM/fast-isel-redefinition.ll
+++ b/test/CodeGen/ARM/fast-isel-redefinition.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O0 -verify-machineinstrs -optimize-regalloc -regalloc=basic < %s
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -optimize-regalloc -regalloc=basic < %s
 ; This isn't exactly a useful set of command-line options, but check that it
 ; doesn't crash.  (It was crashing because a register was getting redefined.)
 
diff --git a/test/CodeGen/ARM/fast-isel-ret.ll b/test/CodeGen/ARM/fast-isel-ret.ll
index 689b169ee3..a7d271a94c 100644
--- a/test/CodeGen/ARM/fast-isel-ret.ll
+++ b/test/CodeGen/ARM/fast-isel-ret.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s
 
 ; Sign-extend of i1 currently not supported by fast-isel
diff --git a/test/CodeGen/ARM/fast-isel-select.ll b/test/CodeGen/ARM/fast-isel-select.ll
index b83a733669..bb8881430c 100644
--- a/test/CodeGen/ARM/fast-isel-select.ll
+++ b/test/CodeGen/ARM/fast-isel-select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
 
 define i32 @t1(i1 %c) nounwind readnone {
diff --git a/test/CodeGen/ARM/fast-isel-shifter.ll b/test/CodeGen/ARM/fast-isel-shifter.ll
index 111818b289..dbb1ce231a 100644
--- a/test/CodeGen/ARM/fast-isel-shifter.ll
+++ b/test/CodeGen/ARM/fast-isel-shifter.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi | FileCheck %s --check-prefix=ARM
 
 define i32 @shl() nounwind ssp {
 entry:
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index e8759a7fc4..7d86cb9b69 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=static | FileCheck -check-prefix=NORM %s
 
 define void @myadd(float* %sum, float* %addend) nounwind {
 entry:
@@ -24,7 +26,7 @@ entry:
   store float 0.000000e+00, float* %ztot, align 4
   store float 1.000000e+00, float* %z, align 4
 ; CHECK-LONG: blx     r
-; CHECK-NORM: bl      _myadd
+; CHECK-NORM: bl      {{_?}}myadd
   call void @myadd(float* %ztot, float* %z)
   ret i32 0
 }
diff --git a/test/CodeGen/ARM/fast-isel-vararg.ll b/test/CodeGen/ARM/fast-isel-vararg.ll
new file mode 100644
index 0000000000..e1baa012df
--- /dev/null
+++ b/test/CodeGen/ARM/fast-isel-vararg.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-IOS
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
+
+define i32 @VarArg() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %k = alloca i32, align 4
+  %m = alloca i32, align 4
+  %n = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %0 = load i32* %i, align 4
+  %1 = load i32* %j, align 4
+  %2 = load i32* %k, align 4
+  %3 = load i32* %m, align 4
+  %4 = load i32* %n, align 4
+; ARM-IOS: VarArg
+; ARM-IOS: mov r7, sp
+; ARM-IOS: sub sp, sp, #32
+; ARM-IOS: movw r0, #5
+; ARM-IOS: ldr r1, [r7, #-4]
+; ARM-IOS: ldr r2, [r7, #-8]
+; ARM-IOS: ldr r3, [r7, #-12]
+; ARM-IOS: ldr r9, [sp, #16]
+; ARM-IOS: ldr r12, [sp, #12]
+; ARM-IOS: str r9, [sp]
+; ARM-IOS: str r12, [sp, #4]
+; ARM-IOS: bl _CallVariadic
+; THUMB: sub sp, #32
+; THUMB: movs r0, #5
+; THUMB: movt r0, #0
+; THUMB: ldr r1, [sp, #28]
+; THUMB: ldr r2, [sp, #24]
+; THUMB: ldr r3, [sp, #20]
+; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16]
+; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #12]
+; THUMB: str.w {{[a-z0-9]+}}, [sp]
+; THUMB: str.w {{[a-z0-9]+}}, [sp, #4]
+; THUMB: bl {{_?}}CallVariadic
+  %call = call i32 (i32, ...)* @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4)
+  store i32 %call, i32* %tmp, align 4
+  %5 = load i32* %tmp, align 4
+  ret i32 %5
+}
+
+declare i32 @CallVariadic(i32, ...)
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 41fda41326..c4274c5eb5 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -1,7 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; Very basic fast-isel functionality.
 define i32 @add(i32 %a, i32 %b) nounwind {
@@ -144,82 +142,25 @@ define void @test4() {
   store i32 %b, i32* @test4g
   ret void
 
-; THUMB: movw r0, :lower16:L_test4g$non_lazy_ptr
-; THUMB: movt r0, :upper16:L_test4g$non_lazy_ptr
+
+; Note that relocations are either movw/movt or constant pool
+; loads. Different platforms will select different approaches.
+
+; THUMB: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr.n r0, .LCPI)}}
+; THUMB: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; THUMB: ldr r0, [r0]
 ; THUMB: ldr r1, [r0]
 ; THUMB: adds r1, #1
 ; THUMB: str r1, [r0]
 
-; ARM: movw r0, :lower16:L_test4g$non_lazy_ptr
-; ARM: movt r0, :upper16:L_test4g$non_lazy_ptr
+; ARM: {{(movw r0, :lower16:L_test4g\$non_lazy_ptr)|(ldr r0, .LCPI)}}
+; ARM: {{(movt r0, :upper16:L_test4g\$non_lazy_ptr)?}}
 ; ARM: ldr r0, [r0]
 ; ARM: ldr r1, [r0]
 ; ARM: add r1, r1, #1
 ; ARM: str r1, [r0]
 }
 
-; Check unaligned stores
-%struct.anon = type <{ float }>
-
-@a = common global %struct.anon* null, align 4
-
-define void @unaligned_store(float %x, float %y) nounwind {
-entry:
-; ARM: @unaligned_store
-; ARM: vmov r1, s0
-; ARM: str r1, [r0]
-
-; THUMB: @unaligned_store
-; THUMB: vmov r1, s0
-; THUMB: str r1, [r0]
-
-  %add = fadd float %x, %y
-  %0 = load %struct.anon** @a, align 4
-  %x1 = getelementptr inbounds %struct.anon* %0, i32 0, i32 0
-  store float %add, float* %x1, align 1
-  ret void
-}
-
-; Doublewords require only word-alignment.
-; rdar://10528060
-%struct.anon.0 = type { double }
-
-@foo_unpacked = common global %struct.anon.0 zeroinitializer, align 4
-
-define void @test5(double %a, double %b) nounwind {
-entry:
-; ARM: @test5
-; THUMB: @test5
-  %add = fadd double %a, %b
-  store double %add, double* getelementptr inbounds (%struct.anon.0* @foo_unpacked, i32 0, i32 0), align 4
-; ARM: vstr d16, [r0]
-; THUMB: vstr d16, [r0]
-  ret void
-}
-
-; Check unaligned loads of floats
-%class.TAlignTest = type <{ i16, float }>
-
-define zeroext i1 @test6(%class.TAlignTest* %this) nounwind align 2 {
-entry:
-; ARM: @test6
-; THUMB: @test6
-  %0 = alloca %class.TAlignTest*, align 4
-  store %class.TAlignTest* %this, %class.TAlignTest** %0, align 4
-  %1 = load %class.TAlignTest** %0
-  %2 = getelementptr inbounds %class.TAlignTest* %1, i32 0, i32 1
-  %3 = load float* %2, align 1
-  %4 = fcmp une float %3, 0.000000e+00
-; ARM: ldr r0, [r0, #2]
-; ARM: vmov s0, r0
-; ARM: vcmpe.f32 s0, #0
-; THUMB: ldr.w r0, [r0, #2]
-; THUMB: vmov s0, r0
-; THUMB: vcmpe.f32 s0, #0
-  ret i1 %4
-}
-
 ; ARM: @urem_fold
 ; THUMB: @urem_fold
 ; ARM: and r0, r0, #31
@@ -229,10 +170,10 @@ define i32 @urem_fold(i32 %a) nounwind {
   ret i32 %rem
 }
 
-define i32 @test7() noreturn nounwind  {
+define i32 @trap_intrinsic() noreturn nounwind  {
 entry:
-; ARM: @test7
-; THUMB: @test7
+; ARM: @trap_intrinsic
+; THUMB: @trap_intrinsic
 ; ARM: trap
 ; THUMB: trap
   tail call void @llvm.trap( )
@@ -240,67 +181,3 @@ entry:
 }
 
 declare void @llvm.trap() nounwind
-
-define void @unaligned_i16_store(i16 %x, i16* %y) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i16_store
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-
-; THUMB-STRICT-ALIGN: @unaligned_i16_store
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-
-  store i16 %x, i16* %y, align 1
-  ret void
-}
-
-define i16 @unaligned_i16_load(i16* %x) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i16_load
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-
-; THUMB-STRICT-ALIGN: @unaligned_i16_load
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-
-  %0 = load i16* %x, align 1
-  ret i16 %0
-}
-
-define void @unaligned_i32_store(i32 %x, i32* %y) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i32_store
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-; ARM-STRICT-ALIGN: strb
-
-; THUMB-STRICT-ALIGN: @unaligned_i32_store
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-; THUMB-STRICT-ALIGN: strb
-
-  store i32 %x, i32* %y, align 1
-  ret void
-}
-
-define i32 @unaligned_i32_load(i32* %x) nounwind {
-entry:
-; ARM-STRICT-ALIGN: @unaligned_i32_load
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-; ARM-STRICT-ALIGN: ldrb
-
-; THUMB-STRICT-ALIGN: @unaligned_i32_load
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-; THUMB-STRICT-ALIGN: ldrb
-
-  %0 = load i32* %x, align 1
-  ret i32 %0
-}
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 9c8ff2b409..8b77301273 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -2,6 +2,13 @@
 ; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
 ; RUN: llc < %s -march=arm   -mattr=+v7         | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -march=arm   -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP
+; @LOCALMOD-START
+; TODO(jfb) Use -mcpu=cortex-a9-mp here, currently disabled because
+;           llvm-objdump doesn't properly disassemble pldw. binutils'
+;           objdump disassembles the instruction just fine.
+; RUN: llc < %s -mcpu=cortex-a9 -mtriple=armv7-unknown-nacl -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s -check-prefix=ARM-NACL
+; @LOCALMOD-END
 ; rdar://8601536
 
 define void @t1(i8* %ptr) nounwind  {
@@ -17,6 +24,15 @@ entry:
 ; THUMB2: t1:
 ; THUMB2-NOT: pldw [r0]
 ; THUMB2: pld [r0]
+
+; @LOCALMOD-START
+; TODO(jfb) This pldw doesn't llvm-objdump properlu, fix this when the
+;           above-mentioned bug is fixed.
+; ARM-NACL-DISABLED-TODO-REENABLE: bic r0, r0, #3221225472
+; ARM-NACL-DISABLED-TODO-REENABLE: pldw [r0]
+; ARM-NACL: bic r0, r0, #3221225472
+; ARM-NACL: pld [r0]
+; @LOCALMOD-END
   tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
   tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
   ret void
@@ -29,6 +45,11 @@ entry:
 
 ; THUMB2: t2:
 ; THUMB2: pld [r0, #1023]
+
+; @LOCALMOD-START
+; ARM-NACL: bic r0, r0, #3221225472
+; ARM-NACL: pld [r0, #1023]
+; @LOCALMOD-END
   %tmp = getelementptr i8* %ptr, i32 1023
   tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
   ret void
@@ -42,6 +63,11 @@ entry:
 ; THUMB2: t3:
 ; THUMB2: lsrs r1, r1, #2
 ; THUMB2: pld [r0, r1]
+
+; @LOCALMOD-START
+; ARM-NACL: bic r0, r0, #3221225472
+; ARM-NACL: pld [r0]
+; @LOCALMOD-END
   %tmp1 = lshr i32 %offset, 2
   %tmp2 = add i32 %base, %tmp1
   %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -56,6 +82,11 @@ entry:
 
 ; THUMB2: t4:
 ; THUMB2: pld [r0, r1, lsl #2]
+
+; @LOCALMOD-START
+; ARM-NACL: bic r0, r0, #3221225472
+; ARM-NACL: pld [r0]
+; @LOCALMOD-END
   %tmp1 = shl i32 %offset, 2
   %tmp2 = add i32 %base, %tmp1
   %tmp3 = inttoptr i32 %tmp2 to i8*
@@ -72,6 +103,11 @@ entry:
 
 ; THUMB2: t5:
 ; THUMB2: pli [r0]
+
+; @LOCALMOD-START
+; ARM-NACL: bic r0, r0, #3221225472
+; ARM-NACL: pli [r0]
+; @LOCALMOD-END
   tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 )
   ret void
 }
diff --git a/test/CodeGen/ARM/unwind-init.ll b/test/CodeGen/ARM/unwind-init.ll
new file mode 100644
index 0000000000..11683d5605
--- /dev/null
+++ b/test/CodeGen/ARM/unwind-init.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=armv7-unknown-linux-gnueabi < %s | FileCheck %s
+; Check that all callee-saved registers are saved and restored in functions
+; that call __builtin_unwind_init(). This is its undocumented behavior in gcc,
+; and it is used in compiling libgcc_eh.
+; See also PR8541
+
+declare void @llvm.eh.unwind.init()
+
+define void @calls_unwind_init() {
+  call void @llvm.eh.unwind.init()
+  ret void
+}
+
+; CHECK: calls_unwind_init:
+; CHECK: push    {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK: vpush   {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK: vpop    {d8, d9, d10, d11, d12, d13, d14, d15}
+; CHECK: pop     {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/test/CodeGen/Thumb2/large-call.ll b/test/CodeGen/Thumb2/large-call.ll
index 61c477aa91..1b4d4625dd 100644
--- a/test/CodeGen/Thumb2/large-call.ll
+++ b/test/CodeGen/Thumb2/large-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mcpu=cortex-a8 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios0.0.0"
 
diff --git a/test/CodeGen/X86/StackColoring.ll b/test/CodeGen/X86/StackColoring.ll
index f8ae74f292..6c0f00d17d 100644
--- a/test/CodeGen/X86/StackColoring.ll
+++ b/test/CodeGen/X86/StackColoring.ll
@@ -350,6 +350,28 @@ bb3:
   ret i32 0
 }
 
+
+; Regression test for PR15707.  %buf1 and %buf2 should not be merged
+; in this test case.
+;YESCOLOR: myCall_pr15707
+;YESCOLOR: subq $200008, %rsp
+;NOCOLOR: myCall_pr15707
+;NOCOLOR: subq $200008, %rsp
+define void @myCall_pr15707() {
+  %buf1 = alloca i8, i32 100000, align 16
+  %buf2 = alloca i8, i32 100000, align 16
+
+  call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.end(i64 -1, i8* %buf1)
+
+  call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+  call void @llvm.lifetime.start(i64 -1, i8* %buf2)
+  %result1 = call i32 @foo(i32 0, i8* %buf1)
+  %result2 = call i32 @foo(i32 0, i8* %buf2)
+  ret void
+}
+
+
 ; Check that we don't assert and crash even when there are allocas
 ; outside the declared lifetime regions.
 ;YESCOLOR: bad_range
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index f0375f8602..7cc5903202 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-nacl -march=x86 -O0 | FileCheck %s --check-prefix=NACL32
 
 ; GEP indices are interpreted as signed integers, so they
 ; should be sign-extended to 64 bits on 64-bit targets.
@@ -18,6 +19,11 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind {
 ; X64:  	movl	(%r[[A1:si|dx]],%rax,4), %eax
 ; X64:  	ret
 
+; NACL32: test1:
+; NACL32:   movl (%e{{.*}},%e{{.*}},4), %eax
+; NACL32:   popl %ecx
+; NACL32:   nacljmp %ecx
+
 }
 define i32 @test2(i64 %t3, i32* %t1) nounwind {
        %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
@@ -30,6 +36,10 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
 ; X64: test2:
 ; X64:  	movl	(%r[[A1]],%r[[A0]],4), %eax
 ; X64:  	ret
+
+; NACL32: test2:
+; NACL32:   movl (%e{{.*}},%e{{.*}},4), %e
+
 }
 
 
@@ -51,6 +61,10 @@ entry:
 ; X64:  	movb	-2(%r[[A0]]), %al
 ; X64:  	ret
 
+; NACL32: test3:
+; NACL32:   movl 4(%esp), %[[REG:e..]]
+; NACL32:   movb -2(%{{.*}}[[REG]]), %al
+
 }
 
 define double @test4(i64 %x, double* %p) nounwind {
@@ -70,6 +84,8 @@ entry:
 ; X32: 128(%e{{.*}},%e{{.*}},8)
 ; X64: test4:
 ; X64: 128(%r{{.*}},%r{{.*}},8)
+; NACL32: test4:
+; NACL32: 128(%e{{.*}},%e{{.*}},8)
 }
 
 ; PR8961 - Make sure the sext for the GEP addressing comes before the load that
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index ad1520ef81..acfa64582c 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -1,5 +1,7 @@
 ; RUN: llc < %s -mattr=-avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s
 ; RUN: llc < %s -mattr=+avx -fast-isel -mcpu=core2 -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort -mtriple=x86_64-none-nacl | FileCheck %s --check-prefix=NACL64
+; RUN: llc < %s -fast-isel -O0 -regalloc=fast -asm-verbose=0 -fast-isel-abort -mtriple=x86_64-none-nacl -relocation-model=pic | FileCheck %s --check-prefix=NACL64_PIC
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.0.0"
@@ -46,6 +48,16 @@ define i64 @test3() nounwind {
 ; CHECK-NEXT: ret
 }
 
+; NACL64 version uses i32 for 32-bit pointers.
+define i32 @test3_nacl64() nounwind {
+  %A = ptrtoint i32* @G to i32
+  ret i32 %A
+
+; NACL64_PIC: test3_nacl64:
+; NACL64_PIC: movl G@GOTPCREL(%rip), %eax
+; NACL64_PIC-NEXT: popq    %rcx
+; NACL64_PIC-NEXT: nacljmp %ecx, %r15
+}
 
 
 ; rdar://9289558
@@ -301,6 +313,11 @@ define void @test23(i8* noalias sret %result) {
 ; CHECK: call
 ; CHECK: movq  %rdi, %rax
 ; CHECK: ret
+; NACL64: test23:
+; NACL64: call
+; NACL64: movl  %edi, %eax
+; NACL64: popq %rcx
+; NACL64: nacljmp %ecx, %r15
 }
 
 declare i8* @foo23()
diff --git a/test/CodeGen/X86/unwind-init.ll b/test/CodeGen/X86/unwind-init.ll
new file mode 100644
index 0000000000..e34178d872
--- /dev/null
+++ b/test/CodeGen/X86/unwind-init.ll
@@ -0,0 +1,36 @@
+; RUN: llc -mtriple=x86_64-unknown-linux < %s | FileCheck -check-prefix X8664 %s
+; RUN: llc -mtriple=i686-unknown-linux < %s | FileCheck -check-prefix X8632 %s
+; Check that all callee-saved registers are saved and restored in functions
+; that call __builtin_unwind_init(). This is its undocumented behavior in gcc,
+; and it is used in compiling libgcc_eh.
+; See also PR8541
+
+declare void @llvm.eh.unwind.init()
+
+define void @calls_unwind_init() {
+  call void @llvm.eh.unwind.init()
+  ret void
+}
+
+; X8664: calls_unwind_init:
+; X8664: pushq %rbp
+; X8664: pushq %r15
+; X8664: pushq %r14
+; X8664: pushq %r13
+; X8664: pushq %r12
+; X8664: pushq %rbx
+; X8664: popq %rbx
+; X8664: popq %r12
+; X8664: popq %r13
+; X8664: popq %r14
+; X8664: popq %r15
+
+; X8632: calls_unwind_init:
+; X8632: pushl %ebp
+; X8632: pushl %ebx
+; X8632: pushl %edi
+; X8632: pushl %esi
+; X8632: popl %esi
+; X8632: popl %edi
+; X8632: popl %ebx
+; X8632: popl %ebp
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index e3325b6bf6..53c46c6755 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -1,7 +1,7 @@
-;; RUN: llc -O0 -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -mtriple=armv7-linux-gnueabi -filetype=obj %s -o - | \
 ;; RUN:   llvm-readobj -t | FileCheck -check-prefix=ARM %s
 
-;; RUN: llc -O0 -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
+;; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort -mtriple=thumbv7-linux-gnueabi -filetype=obj %s -o - | \
 ;; RUN:   llvm-readobj -t | FileCheck -check-prefix=TMB %s
 
 ;; Ensure that if a jump table is generated that it has Mapping Symbols
diff --git a/test/MC/ARM/elf-note-nacl.ll b/test/MC/ARM/elf-note-nacl.ll
new file mode 100644
index 0000000000..8551cf52b4
--- /dev/null
+++ b/test/MC/ARM/elf-note-nacl.ll
@@ -0,0 +1,22 @@
+; RUN: llc -filetype=obj -mtriple armv7-none-nacl-gnueabi %s -o - \
+; RUN:   | llvm-objdump -triple armv7 -s - | FileCheck %s
+
+; Tests that NaCl object files contain an ELF note section that identifies them
+; to the binutils gold linker
+
+define void @main() {
+  ret void
+}
+
+; There appears to be no way for llvm-objdump to show flags for sections, or
+; to dump groups like readelf.
+; CHECK: .group
+; CHECK: .note.NaCl.ABI.arm
+; The contents of the words in the note section should be:
+;   sizeof "NaCl"
+;   sizeof "arm"
+;   1 (NT_VERSION)
+;   "NaCl" with nul termination and padding to align 4
+;   "arm" with nul termination and padding to align 4
+; CHECK-NEXT: 0000 05000000 04000000 01000000 4e61436c
+; CHECK-NEXT: 0010 00000000 61726d00
diff --git a/test/MC/MachO/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8e03d17a70..f704d801a6 100644
--- a/test/MC/MachO/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
@@ -7,4 +7,5 @@ x:
       .align 4
       add r0, r1, r2
 
+; @LOCALMOD - Changed the NOP encoding
 @ CHECK: ('_section_data', '020081e0 0000a0e1 0000a0e1 0000a0e1 020081e0')
diff --git a/test/MC/Mips/elf-note-nacl.ll b/test/MC/Mips/elf-note-nacl.ll
new file mode 100644
index 0000000000..0361eff25e
--- /dev/null
+++ b/test/MC/Mips/elf-note-nacl.ll
@@ -0,0 +1,22 @@
+; RUN: llc -filetype=obj -mtriple mipsel-none-nacl %s -o - \
+; RUN:   | llvm-objdump -triple mipsel -s - | FileCheck %s
+
+; Tests that NaCl object files contain an ELF note section that identifies them
+; to the binutils gold linker
+
+define void @main() {
+  ret void
+}
+
+; There appears to be no way for llvm-objdump to show flags for sections, or
+; to dump groups like readelf.
+; CHECK: .group
+; CHECK: .note.NaCl.ABI.mipsel
+; The contents of the words in the note section should be:
+;   sizeof "NaCl"
+;   sizeof "mipsel"
+;   1 (NT_VERSION)
+;   "NaCl" with nul termination and padding to align 4
+;   "mipsel" with nul termination and padding to align 4
+; CHECK-NEXT: 0000 05000000 07000000 01000000 4e61436c
+; CHECK-NEXT: 0010 00000000 6d697073 656c0000
diff --git a/test/MC/X86/elf-note-nacl.ll b/test/MC/X86/elf-note-nacl.ll
new file mode 100644
index 0000000000..ce15455c84
--- /dev/null
+++ b/test/MC/X86/elf-note-nacl.ll
@@ -0,0 +1,36 @@
+; RUN: llc -filetype=obj -mtriple i686-none-nacl %s -o - \
+; RUN:   | llvm-objdump -triple i686 -s - | FileCheck --check-prefix=I386 %s
+
+; RUN: llc -filetype=obj -mtriple x86_64-none-nacl %s -o - \
+; RUN:   | llvm-objdump -triple x86_64 -s - | FileCheck --check-prefix=X8664 %s
+
+; Tests that NaCl object files contain an ELF note section that identifies them
+; to the binutils gold linker
+
+define void @main() {
+  ret void
+}
+
+; There appears to be no way for llvm-objdump to show flags for sections, or
+; to dump groups like readelf.
+; I386: .group
+; I386: .note.NaCl.ABI.x86-32
+; The contents of the words in the note section should be:
+;   sizeof "NaCl"
+;   sizeof "x86-32"
+;   1 (NT_VERSION)
+;   "NaCl" with nul termination and padding to align 4
+;   "x86-32" with nul termination and padding to align 4
+; I386-NEXT: 0000 05000000 07000000 01000000 4e61436c
+; I386-NEXT: 0010 00000000 7838362d 33320000
+
+; X8664: .group
+; X8664: .note.NaCl.ABI.x86-64
+; The contents of the words in the note section should be:
+;   sizeof "NaCl"
+;   sizeof "x86-64"
+;   1 (NT_VERSION)
+;   "NaCl" with nul termination and padding to align 4
+;   q"x86-64" with nul termination and padding to align 4
+; X8664-NEXT: 0000 05000000 07000000 01000000 4e61436c
+; X8664-NEXT: 0010 00000000 7838362d 36340000
diff --git a/test/NaCl/ARM/blx-sandboxing.ll b/test/NaCl/ARM/blx-sandboxing.ll
new file mode 100644
index 0000000000..f7f154a82f
--- /dev/null
+++ b/test/NaCl/ARM/blx-sandboxing.ll
@@ -0,0 +1,17 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-branch -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define i32 @foobar(i32 %aa, i32 %bb, i32 (i32)* %f) nounwind {
+entry:
+  %aa.addr = alloca i32, align 4
+  %bb.addr = alloca i32, align 4
+  %f.addr = alloca i32 (i32)*, align 8
+  %0 = load i32 (i32)** %f.addr, align 8
+  %1 = load i32* %aa.addr, align 4
+  %call1 = call i32 %0(i32 %1)
+; CHECK: bic	r1, r1, #3221225487
+; CHECK-NEXT: blx r1
+  ret i32 %call1
+}
+
+
diff --git a/test/NaCl/ARM/bx-sandboxing.ll b/test/NaCl/ARM/bx-sandboxing.ll
new file mode 100644
index 0000000000..199f5a418d
--- /dev/null
+++ b/test/NaCl/ARM/bx-sandboxing.ll
@@ -0,0 +1,10 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-branch -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define i32 @foo() nounwind {
+entry:
+  ret i32 42
+; CHECK: bic	lr, lr, #3221225487
+; CHECK-NEXT: bx lr
+}
+
diff --git a/test/NaCl/ARM/call-return-sandboxing1.ll b/test/NaCl/ARM/call-return-sandboxing1.ll
new file mode 100644
index 0000000000..06207075f5
--- /dev/null
+++ b/test/NaCl/ARM/call-return-sandboxing1.ll
@@ -0,0 +1,71 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-branch -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define i32 @foo(i32 %aa, i32 %bb) nounwind {
+entry:
+  %aa.addr = alloca i32, align 4
+  %bb.addr = alloca i32, align 4
+  %cc = alloca i32, align 4
+  %dd = alloca i32, align 4
+  store i32 %aa, i32* %aa.addr, align 4
+  store i32 %bb, i32* %bb.addr, align 4
+  %0 = load i32* %aa.addr, align 4
+  %1 = load i32* %bb.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* %cc, align 4
+  %2 = load i32* %aa.addr, align 4
+  %mul1 = mul nsw i32 %2, 17
+  %3 = load i32* %cc, align 4
+  %sub = sub nsw i32 %mul1, %3
+  store i32 %sub, i32* %dd, align 4
+  %4 = load i32* %dd, align 4
+  ret i32 %4
+
+; This checks two things:
+; 1. bx lr is sandboxed by prepending a bic
+; 2. The bic/bx pair don't straddle a 16-byte bundle boundary, hence the nop
+; CHECK:      nop
+; CHECK-NEXT: bic	lr, lr, #3221225487
+; CHECK-NEXT: bx lr
+
+}
+
+define i32 @bar(i32 %aa, i32 %bb) nounwind {
+entry:
+
+; Check that the function start is padded with nops to start at a bundle
+; boundary
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: push
+
+  %aa.addr = alloca i32, align 4
+  %bb.addr = alloca i32, align 4
+  store i32 %aa, i32* %aa.addr, align 4
+  store i32 %bb, i32* %bb.addr, align 4
+  %0 = load i32* %aa.addr, align 4
+  %mul = mul nsw i32 %0, 19
+  %call = call i32 @foo(i32 %mul, i32 7)
+
+; Check that the call is padded to be at the end of a bundle
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl
+
+  %1 = load i32* %bb.addr, align 4
+  %mul1 = mul nsw i32 %1, 31
+  %2 = load i32* %bb.addr, align 4
+  %div = sdiv i32 %2, 7
+  %add = add nsw i32 %div, 191
+  %call2 = call i32 @foo(i32 %mul1, i32 %add)
+
+; Check that the call is padded to be at the end of a bundle
+; CHECK:      nop
+; CHECK-NEXT: nop
+; CHECK-NEXT: bl
+
+  %add3 = add nsw i32 %call, %call2
+  ret i32 %add3
+}
+
diff --git a/test/NaCl/ARM/divrem-guards-complex.ll b/test/NaCl/ARM/divrem-guards-complex.ll
new file mode 100644
index 0000000000..ba66ab4b31
--- /dev/null
+++ b/test/NaCl/ARM/divrem-guards-complex.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -insert-divide-check -S | FileCheck -check-prefix=OPT %s
+
+declare void @foo()
+
+; Check for multiple divs that occur one block.
+define i32 @twodivs_one_block(i32 %x, i32 %y) #0 {
+entry:
+  call void @foo()
+  br label %divblock
+divblock:
+  %div1 = sdiv i32 %x, %y
+  %div2 = sdiv i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: sdiv
+; OPT: %1 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %1, label %divrem.by.zero1, label %guarded.divrem2
+; OPT: guarded.divrem2:
+; OPT-NEXT: sdiv
+; OPT-NEXT: add
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; OPT: divrem.by.zero1:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+  %sum = add i32 %div1, %div2
+  ret i32 %sum
+}
+
+define i32 @twodivs_three_blocks(i32 %x, i32 %y) #0 {
+entry:
+  call void @foo()
+  br label %divblock
+divblock:
+  %div1 = sdiv i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: sdiv
+; OPT-NEXT: br label %exitblock
+  br label %exitblock
+exitblock:
+  call void @foo()
+  %div2 = sdiv i32 %x, %y
+; OPT: %1 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %1, label %divrem.by.zero1, label %guarded.divrem2
+; OPT: guarded.divrem2:
+; OPT-NEXT: sdiv
+; OPT-NEXT: add
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; OPT: divrem.by.zero1:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+  %sum = add i32 %div1, %div2
+  ret i32 %sum
+}
+
+; Check for divs that occur in blocks with multiple predecessors.
+define i32 @onediv_two_predecessors(i32 %x, i32 %y) #0 {
+entry:
+  call void @foo()
+  br label %divblock
+divblock:
+  %x1 = phi i32 [%x, %entry], [%x2, %divblock]
+  %div1 = sdiv i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: sdiv
+; OPT-NEXT: sub
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+  %x2 = sub i32 %x1, 1
+  %p = icmp ne i32 %x2, 0
+  br i1 %p, label %divblock, label %exitblock
+exitblock:
+  call void @foo()
+  ret i32 %div1
+}
+
diff --git a/test/NaCl/ARM/divrem-guards.ll b/test/NaCl/ARM/divrem-guards.ll
new file mode 100644
index 0000000000..574517f0cd
--- /dev/null
+++ b/test/NaCl/ARM/divrem-guards.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -insert-divide-check -S | FileCheck -check-prefix=OPT %s
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-branch -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 -mattr=+nacl-trap - \
+; RUN:  | FileCheck -check-prefix=ARM %s
+
+
+; Check for all four operators that need guards.
+define i32 @mysdiv(i32 %x, i32 %y) #0 {
+entry:
+  %div1 = sdiv i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: sdiv
+; OPT-NEXT: ret
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; ARM: cmp r1, #0
+; ARM-NEXT: beq
+  ret i32 %div1
+; ARM: f0 de fe e7
+}
+
+define i32 @myudiv(i32 %x, i32 %y) #0 {
+entry:
+  %div1 = udiv i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: udiv
+; OPT-NEXT: ret
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; ARM: cmp r1, #0
+; ARM-NEXT: beq
+  ret i32 %div1
+; ARM: f0 de fe e7
+}
+
+define i32 @mysrem(i32 %x, i32 %y) #0 {
+entry:
+  %rem1 = srem i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: srem
+; OPT-NEXT: ret
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; ARM: cmp r1, #0
+; ARM-NEXT: beq
+  ret i32 %rem1
+; ARM: f0 de fe e7
+}
+
+define i32 @myurem(i32 %x, i32 %y) #0 {
+entry:
+  %rem1 = urem i32 %x, %y
+; OPT: %0 = icmp eq i32 %y, 0
+; OPT-NEXT: br i1 %0, label %divrem.by.zero, label %guarded.divrem
+; OPT: guarded.divrem:
+; OPT-NEXT: urem
+; OPT-NEXT: ret
+; OPT: divrem.by.zero:
+; OPT-NEXT: call void @llvm.trap()
+; OPT-NEXT: unreachable
+; ARM: cmp r1, #0
+; ARM-NEXT: beq
+  ret i32 %rem1
+; ARM: f0 de fe e7
+}
+
+; Divides by non-zero constants should not be guarded.
+define i32 @mysdiv_const(i32 %x) #0 {
+entry:
+  %div1 = sdiv i32 %x, 10
+; OPT-NOT: icmp
+; OPT-NOT: br
+; OPT-NOT: guarded.divrem:
+; OPT-NOT: divrem.by.zero:
+; OPT-NOT: call void @llvm.trap()
+; OPT-NOT: unreachable
+; ARM-NOT: cmp r1, #0
+; ARM-NOT: f0 de fe e7
+  ret i32 %div1
+}
+
+; Divides by explicit zero should prefixed by a trap.
+define i32 @mysdiv_zero(i32 %x) #0 {
+entry:
+  %div1 = sdiv i32 %x, 0
+; OPT-NOT: guarded.divrem:
+; OPT-NOT: divrem.by.zero:
+; OPT: call void @llvm.trap()
+; OPT-NEXT: sdiv
+; ARM-NOT: cmp r1, #0
+; ARM: f0 de fe e7
+  ret i32 %div1
+}
+
+attributes #0 = { nounwind }
diff --git a/test/NaCl/ARM/intrinsics-bitmanip.ll b/test/NaCl/ARM/intrinsics-bitmanip.ll
new file mode 100644
index 0000000000..2bbe0c6830
--- /dev/null
+++ b/test/NaCl/ARM/intrinsics-bitmanip.ll
@@ -0,0 +1,65 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -filetype=asm %s -o - | FileCheck %s
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -O0 -filetype=asm %s -o - | FileCheck %s
+
+; Test that various bit manipulation intrinsics are supported by the
+; NaCl ARM backend.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+; CHECK: test_bswap_16
+; CHECK: rev [[REG:r[0-9]+]], {{r[0-9]+}}
+; CHECK-NEXT: lsr {{.*}}[[REG]], {{.*}}[[REG]], #16
+define i16 @test_bswap_16(i16 %a) {
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  ret i16 %b
+}
+
+; CHECK: test_bswap_const_16
+; 0xcdab
+; CHECK: movw r0, #52651
+define i16 @test_bswap_const_16() {
+  ; 0xabcd
+  %a = call i16 @llvm.bswap.i16(i16 43981)
+  ret i16 %a
+}
+
+; CHECK: test_bswap_32
+; CHECK: rev [[REG:r[0-9]+]], {{r[0-9]+}}
+define i32 @test_bswap_32(i32 %a) {
+  %b = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %b
+}
+
+; CHECK: test_bswap_const_32
+; 0x01ef cdab
+; CHECK: movw r0, #52651
+; CHECK: movt r0, #495
+define i32 @test_bswap_const_32() {
+  ; 0xabcdef01
+  %a = call i32 @llvm.bswap.i32(i32 2882400001)
+  ret i32 %a
+}
+
+; CHECK: test_bswap_64
+; O0 spills when swapping r0/r1, whereas O2 uses r2 as a temporary.
+; CHECK: rev {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK: rev r1, {{r[0-9]+}}
+; CHECK: mov r0, {{r[0-9]+}}
+define i64 @test_bswap_64(i64 %a) {
+  %b = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %b
+}
+
+; CHECK: test_bswap_const_64
+; 0x8967 4523 01ef cdab
+; Just checking movw, since O0 and O2 have different schedules for the
+; movw/movt of r0/r1.
+; CHECK: movw r0, #52651
+; CHECK: movw r1, #17699
+define i64 @test_bswap_const_64() {
+  ; 0xabcdef01 23456789
+  %a = call i64 @llvm.bswap.i64(i64 12379813738877118345)
+  ret i64 %a
+}
diff --git a/test/NaCl/ARM/lit.local.cfg b/test/NaCl/ARM/lit.local.cfg
new file mode 100644
index 0000000000..bac2ffab31
--- /dev/null
+++ b/test/NaCl/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
+
diff --git a/test/NaCl/ARM/nacl-read-tp-intrinsic.ll b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll
new file mode 100644
index 0000000000..f88f665f36
--- /dev/null
+++ b/test/NaCl/ARM/nacl-read-tp-intrinsic.ll
@@ -0,0 +1,20 @@
+
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=asm %s -o - \
+; RUN:   | FileCheck -check-prefix=ARM %s
+
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=asm -mtls-use-call %s -o - \
+; RUN:   | FileCheck -check-prefix=ARM_IRT %s
+
+
+declare i8* @llvm.nacl.read.tp()
+
+define i8* @get_thread_pointer() {
+  %tp = call i8* @llvm.nacl.read.tp()
+  ret i8* %tp
+}
+
+; ARM: get_thread_pointer:
+; ARM: ldr r0, [r9]
+
+; ARM_IRT: get_thread_pointer:
+; ARM_IRT: bl __aeabi_read_tp
diff --git a/test/NaCl/ARM/nacl-setlongjmp-intrinsics.ll b/test/NaCl/ARM/nacl-setlongjmp-intrinsics.ll
new file mode 100644
index 0000000000..9d24037560
--- /dev/null
+++ b/test/NaCl/ARM/nacl-setlongjmp-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: pnacl-llc -mtriple=arm-unknown-nacl -filetype=asm %s -o - \
+; RUN:  | FileCheck %s --check-prefix=ARM
+; Test that @llvm.nacl.{set|long}jmp intrinsics calls get translated to library
+; calls as expected.
+
+declare i32 @llvm.nacl.setjmp(i8*)
+declare void @llvm.nacl.longjmp(i8*, i32)
+
+define void @foo(i8* %arg) {
+  %num = call i32 @llvm.nacl.setjmp(i8* %arg)
+; ARM: bl setjmp
+
+  call void @llvm.nacl.longjmp(i8* %arg, i32 %num)
+; ARM: bl longjmp
+
+  ret void
+}
+
diff --git a/test/NaCl/ARM/negative-addend.ll b/test/NaCl/ARM/negative-addend.ll
new file mode 100644
index 0000000000..41d7eba43c
--- /dev/null
+++ b/test/NaCl/ARM/negative-addend.ll
@@ -0,0 +1,19 @@
+; RUN: pnacl-llc -mtriple=arm-unknown-nacl -filetype=obj %s -o - \
+; RUN:   | llvm-objdump -r - | FileCheck %s -check-prefix=ARM
+
+; Check that "add" works for negative values when used as a
+; ConstantExpr in a global variable initializer.
+; See: https://code.google.com/p/nativeclient/issues/detail?id=3548
+
+
+; @spacer and @var end up in the BSS section.
+; @spacer is at offset 0.  @var is at offset 4096 = 0x1000.
+
+@spacer = internal global [4096 x i8] zeroinitializer
+@var = internal global i32 zeroinitializer
+
+@negative_offset = internal global i32 add
+    (i32 ptrtoint (i32* @var to i32), i32 -8)
+
+; ARM: RELOCATION RECORDS FOR [.data.rel.local]:
+; ARM-NEXT: 0 R_ARM_ABS32 .bss
diff --git a/test/NaCl/ARM/neon-vld1-sandboxing.ll b/test/NaCl/ARM/neon-vld1-sandboxing.ll
new file mode 100644
index 0000000000..f5b94d02d6
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld1-sandboxing.ll
@@ -0,0 +1,113 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+  %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}}, [r0:64]
+  ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+}}}, [r0]
+  ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+}}}, [r0]
+  ret <2 x i32> %tmp1
+}
+
+; Insert useless arguments here just for the sake of moving
+; %A further down the rN chain (testing how sandboxing detects
+; the correct register and not just the default r0)
+define <1 x i64> @vld1i64(i32 %foo, i32 %bar, i32 %baz,
+                          i64* %A) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+; CHECK:         bic r3, r3, #3221225472
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}}, [r3]
+  ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
+  ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
+  ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+  ret <4 x i32> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+  ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+  %A = load i8** %ptr
+  %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1:64]!
+  %tmp2 = getelementptr i8* %A, i32 16
+  store i8* %tmp2, i8** %ptr
+  ret <16 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+}}}, [r1]!
+  %tmp2 = getelementptr i16* %A, i32 4
+         store i16* %tmp2, i16** %ptr
+  ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+; CHECK:         bic r2, r2, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+}}}, [r2], r1
+  %tmp2 = getelementptr i32* %A, i32 %inc
+  store i32* %tmp2, i32** %ptr
+  ret <2 x i32> %tmp1
+}
+
diff --git a/test/NaCl/ARM/neon-vld2-sandboxing.ll b/test/NaCl/ARM/neon-vld2-sandboxing.ll
new file mode 100644
index 0000000000..0d96f0d9c7
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld2-sandboxing.ll
@@ -0,0 +1,116 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-load -sfi-store -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+  %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+  %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+  %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+  %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+  %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+  %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.32   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+  %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+  %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+  %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+  %tmp4 = add <8 x i16> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+  %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+  %tmp4 = add <4 x i32> %tmp2, %tmp3
+; CHECK: bic      r0, r0, #3221225472
+; CHECK: vld2.32   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:256]
+  ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+  %A = load i8** %ptr
+  %tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+  %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+  %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK: bic      r2, r2, #3221225472
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2:128], r1
+  %tmp5 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp5, i8** %ptr
+  ret <16 x i8> %tmp4
+}
+
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+; CHECK: bic       r1, r1, #3221225472
+; CHECK: vld2.32   {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+  %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+  %tmp4 = fadd <2 x float> %tmp2, %tmp3
+  %tmp5 = getelementptr float* %A, i32 4
+  store float* %tmp5, float** %ptr
+  ret <2 x float> %tmp4
+}
+
diff --git a/test/NaCl/ARM/neon-vld3-sandboxing.ll b/test/NaCl/ARM/neon-vld3-sandboxing.ll
new file mode 100644
index 0000000000..2da916b28e
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld3-sandboxing.ll
@@ -0,0 +1,106 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld3i8(i32 %foobar, i32 %ba, i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+  %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+  %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK:         bic r2, r2, #3221225472
+; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2:64]
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+  %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+  %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+  %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+  %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0]
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+  %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+  %tmp4 = add <1 x i64> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
+  ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+  %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+  %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]!
+  ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+; CHECK:         bic r2, r2, #3221225472
+; CHECK-NEXT:    vld3.16  {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2], r1
+  %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+  %tmp4 = add <4 x i16> %tmp2, %tmp3
+  %tmp5 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp5, i16** %ptr
+  ret <4 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+  %A = load i32** %ptr
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld3.32  {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r1]!
+  %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+  %tmp4 = add <4 x i32> %tmp2, %tmp3
+  %tmp5 = getelementptr i32* %A, i32 12
+  store i32* %tmp5, i32** %ptr
+  ret <4 x i32> %tmp4
+}
+
diff --git a/test/NaCl/ARM/neon-vld4-sandboxing.ll b/test/NaCl/ARM/neon-vld4-sandboxing.ll
new file mode 100644
index 0000000000..d888a99165
--- /dev/null
+++ b/test/NaCl/ARM/neon-vld4-sandboxing.ll
@@ -0,0 +1,107 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+  %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+  %tmp4 = add <8 x i8> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
+  ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+  %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+  %tmp4 = add <4 x i16> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
+  ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+  %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+  %tmp4 = add <2 x i32> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
+  ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+  %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+  %tmp4 = add <1 x i64> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
+  ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+  %tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+  %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+  %tmp4 = add <16 x i8> %tmp2, %tmp3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]!
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
+  ret <16 x i8> %tmp4
+}
+
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+  %A = load i8** %ptr
+  %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+; CHECK:         bic r2, r2, #3221225472
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2:128], r1
+  %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+  %tmp4 = add <8 x i8> %tmp2, %tmp3
+  %tmp5 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp5, i8** %ptr
+  ret <8 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r1:64]!
+  %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+  %tmp4 = add <8 x i16> %tmp2, %tmp3
+  %tmp5 = getelementptr i16* %A, i32 32
+  store i16* %tmp5, i16** %ptr
+  ret <8 x i16> %tmp4
+}
+
diff --git a/test/NaCl/ARM/neon-vlddup-sandboxing.ll b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
new file mode 100644
index 0000000000..187efedbb0
--- /dev/null
+++ b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
@@ -0,0 +1,151 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+define <8 x i8> @vld1dupi8(i32 %foo, i32 %bar,
+                           i8* %A) nounwind {
+  %tmp1 = load i8* %A, align 8
+  %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+  %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+; CHECK:         bic r2, r2, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+\[\]}}}, [r2]
+  ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+  %tmp1 = load i16* %A, align 8
+  %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+  %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[\]}}}, [r0:16]
+  ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+  %tmp1 = load i32* %A, align 8
+  %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+  %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[\]}}}, [r0:32]
+  ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+  %tmp1 = load i8* %A, align 8
+  %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+  %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.8 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+  %tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+  %tmp5 = add <8 x i8> %tmp2, %tmp4
+  ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i8* %A) nounwind {
+  %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+  %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+  %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+  %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp5 = add <4 x i16> %tmp2, %tmp4
+  ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i8* %A) nounwind {
+  %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0:64]
+  %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+  %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp5 = add <2 x i32> %tmp2, %tmp4
+  ret <2 x i32> %tmp5
+}
+
+define <4 x i16> @vld3dupi16(i8* %A) nounwind {
+  %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0]
+  %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+  %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+  %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+  %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp7 = add <4 x i16> %tmp2, %tmp4
+  %tmp8 = add <4 x i16> %tmp7, %tmp6
+  ret <4 x i16> %tmp8
+}
+
+define <2 x i32> @vld4dupi32(i8* %A) nounwind {
+  %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0:64]
+  %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+  %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+  %tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+  %tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp9 = add <2 x i32> %tmp2, %tmp4
+  %tmp10 = add <2 x i32> %tmp6, %tmp8
+  %tmp11 = add <2 x i32> %tmp9, %tmp10
+  ret <2 x i32> %tmp11
+}
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+  %A = load i16** %ptr
+  %A2 = bitcast i16* %A to i8*
+  %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r1]!
+  %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+  %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+  %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+  %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+  %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+  %tmp9 = add <4 x i16> %tmp2, %tmp4
+  %tmp10 = add <4 x i16> %tmp6, %tmp8
+  %tmp11 = add <4 x i16> %tmp9, %tmp10
+  %tmp12 = getelementptr i16* %A, i32 4
+  store i16* %tmp12, i16** %ptr
+  ret <4 x i16> %tmp11
+}
diff --git a/test/NaCl/ARM/neon-vldlane-sandboxing.ll b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
new file mode 100644
index 0000000000..b7f1097237
--- /dev/null
+++ b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
@@ -0,0 +1,319 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  %tmp2 = load i8* %A, align 8
+  %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp1 = load <4 x i16>* %B
+  %tmp2 = load i16* %A, align 8
+  %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0:16]
+  ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp1 = load <2 x i32>* %B
+  %tmp2 = load i32* %A, align 8
+  %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0:32]
+  ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+  %tmp1 = load <16 x i8>* %B
+  %tmp2 = load i8* %A, align 8
+  %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp1 = load <8 x i16>* %B
+  %tmp2 = load i16* %A, align 8
+  %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0:16]
+  ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp1 = load <4 x i32>* %B
+  %tmp2 = load i32* %A, align 8
+  %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0:32]
+  ret <4 x i32> %tmp3
+}
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+  %tmp5 = add <8 x i8> %tmp3, %tmp4
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:16]
+  ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+  %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+  %tmp5 = add <4 x i16> %tmp3, %tmp4
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:32]
+  ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32 %foo, i32 %bar, i32 %baz,
+                              i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+  %tmp5 = add <2 x i32> %tmp3, %tmp4
+; CHECK:         bic r3, r3, #3221225472
+; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r3]
+  ret <2 x i32> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+  %tmp5 = add <8 x i16> %tmp3, %tmp4
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+  %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+  %tmp5 = add <4 x i32> %tmp3, %tmp4
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
+  ret <4 x i32> %tmp5
+}
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  %tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+  %tmp6 = add <8 x i8> %tmp3, %tmp4
+  %tmp7 = add <8 x i8> %tmp5, %tmp6
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+  %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+  %tmp6 = add <4 x i16> %tmp3, %tmp4
+  %tmp7 = add <4 x i16> %tmp5, %tmp6
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+  %tmp6 = add <2 x i32> %tmp3, %tmp4
+  %tmp7 = add <2 x i32> %tmp5, %tmp6
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <2 x i32> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+  %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+  %tmp6 = add <8 x i16> %tmp3, %tmp4
+  %tmp7 = add <8 x i16> %tmp5, %tmp6
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+  %tmp6 = add <4 x i32> %tmp3, %tmp4
+  %tmp7 = add <4 x i32> %tmp5, %tmp6
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld3.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <4 x i32> %tmp7
+}
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  %tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+  %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+  %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+  %tmp7 = add <8 x i8> %tmp3, %tmp4
+  %tmp8 = add <8 x i8> %tmp5, %tmp6
+  %tmp9 = add <8 x i8> %tmp7, %tmp8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:32]
+  ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+  %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+  %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+  %tmp7 = add <4 x i16> %tmp3, %tmp4
+  %tmp8 = add <4 x i16> %tmp5, %tmp6
+  %tmp9 = add <4 x i16> %tmp7, %tmp8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+  %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+  %tmp7 = add <2 x i32> %tmp3, %tmp4
+  %tmp8 = add <2 x i32> %tmp5, %tmp6
+  %tmp9 = add <2 x i32> %tmp7, %tmp8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
+  ret <2 x i32> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+  %tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+  %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+  %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+  %tmp7 = add <8 x i16> %tmp3, %tmp4
+  %tmp8 = add <8 x i16> %tmp5, %tmp6
+  %tmp9 = add <8 x i16> %tmp7, %tmp8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
+  ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+  %tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+  %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+  %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+  %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+  %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+  %tmp7 = add <4 x i32> %tmp3, %tmp4
+  %tmp8 = add <4 x i32> %tmp5, %tmp6
+  %tmp9 = add <4 x i32> %tmp7, %tmp8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0]
+  ret <4 x i32> %tmp9
+}
+
diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll
new file mode 100644
index 0000000000..e496aa492a
--- /dev/null
+++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll
@@ -0,0 +1,127 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.8 {{{d[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.16 {{{d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.32 {{{d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <2 x float>* %B
+  call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.32 {{{d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = load <1 x i64>* %B
+  call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.64 {{{d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+  %tmp1 = load <16 x i8>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+  ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <4 x float>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+  %tmp0 = bitcast i64* %A to i8*
+  %tmp1 = load <2 x i64>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+; CHECK:         bic r1, r1, #3221225472
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <2 x float>* %B
+  call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vst1.32 {{{d[0-9]+}}}, [r1]!
+  %tmp2 = getelementptr float* %A, i32 2
+  store float* %tmp2, float** %ptr
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
+
diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll
new file mode 100644
index 0000000000..cfb1a11b5c
--- /dev/null
+++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll
@@ -0,0 +1,104 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
+  ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <2 x float>* %B
+  call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+  %tmp1 = load <16 x i8>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:128]
+  ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:256]
+  ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <4 x float>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0]
+  ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+; CHECK:         bic r1, r1, #3221225472
+  %A = load i8** %ptr
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r1], r2
+  %tmp2 = getelementptr i8* %A, i32 %inc
+  store i8* %tmp2, i8** %ptr
+  ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll
new file mode 100644
index 0000000000..178387c806
--- /dev/null
+++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll
@@ -0,0 +1,50 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0]
+  ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vst3.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+  %tmp2 = getelementptr i16* %A, i32 24
+  store i16* %tmp2, i16** %ptr
+  ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll
new file mode 100644
index 0000000000..685de398dc
--- /dev/null
+++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll
@@ -0,0 +1,55 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
+  ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
+  ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:256]
+  ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+  %A = load float** %ptr
+  %tmp0 = bitcast float* %A to i8*
+  %tmp1 = load <4 x float>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r1]!
+  %tmp2 = getelementptr float* %A, i32 16
+  store float* %tmp2, float** %ptr
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+
diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
new file mode 100644
index 0000000000..d136659fea
--- /dev/null
+++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
@@ -0,0 +1,209 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -mattr=+neon -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+  store i8 %tmp2, i8* %A, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.8 {d{{[0-9]+}}[3]}, [r0]
+  ret void
+}
+
+define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp1 = load <4 x i16>* %B
+  %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+  store i16 %tmp2, i16* %A, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[2]}, [r0:16]
+  ret void
+}
+
+define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp1 = load <2 x i32>* %B
+  %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+  store i32 %tmp2, i32* %A, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.32 {d{{[0-9]+}}[1]}, [r0:32]
+  ret void
+}
+
+define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+  %tmp1 = load <16 x i8>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+  store i8 %tmp2, i8* %A, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.8 {d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+  store i16 %tmp2, i16* %A, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[1]}, [r0:16]
+  ret void
+}
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:16]
+  ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:32]
+  ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0:64]
+  ret void
+}
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst3.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+  %tmp1 = load <8 x i8>* %B
+  call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:32]
+  ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
+  ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <2 x i32>* %B
+  call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:128]
+  ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <8 x i16>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0:64]
+  ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+  %tmp0 = bitcast i32* %A to i8*
+  %tmp1 = load <4 x i32>* %B
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
+  call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0]
+  ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+; CHECK:         bic r1, r1, #3221225472
+  %A = load i16** %ptr
+  %tmp0 = bitcast i16* %A to i8*
+  %tmp1 = load <4 x i16>* %B
+  call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+; CHECK:         bic r1, r1, #3221225472
+; CHECK-NEXT:    vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r1], r2
+  %tmp2 = getelementptr i16* %A, i32 %inc
+  store i16* %tmp2, i16** %ptr
+  ret void
+}
diff --git a/test/NaCl/ARM/simple-load-store_sandboxing1.ll b/test/NaCl/ARM/simple-load-store_sandboxing1.ll
new file mode 100644
index 0000000000..ccf1a50d54
--- /dev/null
+++ b/test/NaCl/ARM/simple-load-store_sandboxing1.ll
@@ -0,0 +1,27 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-store -sfi-load -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @foo(i32* %input, i32* %output) nounwind {
+entry:
+  %input.addr = alloca i32*, align 4
+  %output.addr = alloca i32*, align 4
+  store i32* %input, i32** %input.addr, align 4
+  store i32* %output, i32** %output.addr, align 4
+  %0 = load i32** %input.addr, align 4
+  %1 = load i32* %0, align 4
+
+; CHECK:          bic r0, r0, #3221225472
+; CHECK-NEXT:     ldr r0, [r0]
+
+  %add = add nsw i32 %1, 4
+  %2 = load i32** %output.addr, align 4
+  store i32 %add, i32* %2, align 4
+
+; CHECK:          bic r1, r1, #3221225472
+; CHECK-NEXT:     str r0, [r1]
+
+  ret void
+}
+
+
+
diff --git a/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll b/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll
new file mode 100644
index 0000000000..15aff8d16d
--- /dev/null
+++ b/test/NaCl/ARM/sp-arithmetic-sandboxing1.ll
@@ -0,0 +1,28 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-store -sfi-load -sfi-stack -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @foo(i32* %input, i32* %output) nounwind {
+entry:
+  %input.addr = alloca i32*, align 4
+  %output.addr = alloca i32*, align 4
+  %temp = alloca i32, align 4
+
+; CHECK:        sub   sp, sp
+; CHECK-NEXT:   bic   sp, sp, #3221225472
+
+  store i32* %input, i32** %input.addr, align 4
+  store i32* %output, i32** %output.addr, align 4
+  %0 = load i32** %input.addr, align 4
+  %arrayidx = getelementptr inbounds i32* %0, i32 1
+  %1 = load i32* %arrayidx, align 4
+  store i32 %1, i32* %temp, align 4
+  %2 = load i32* %temp, align 4
+  %3 = load i32** %output.addr, align 4
+  %arrayidx1 = getelementptr inbounds i32* %3, i32 0
+  store i32 %2, i32* %arrayidx1, align 4
+
+; CHECK:        add   sp, sp
+; CHECK-NEXT:   bic   sp, sp, #3221225472
+
+  ret void
+}
diff --git a/test/NaCl/ARM/stack-change-sandboxing.ll b/test/NaCl/ARM/stack-change-sandboxing.ll
new file mode 100644
index 0000000000..ca6dc2f653
--- /dev/null
+++ b/test/NaCl/ARM/stack-change-sandboxing.ll
@@ -0,0 +1,34 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-stack -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define i32 @foo(i32 %aa, i32 %bb) nounwind {
+entry:
+
+; CHECK:      sub sp, sp, #16
+; CHECK-NEXT: bic	sp, sp, #3221225472
+
+  %aa.addr = alloca i32, align 4
+  %bb.addr = alloca i32, align 4
+  %cc = alloca i32, align 4
+  %dd = alloca i32, align 4
+  store i32 %aa, i32* %aa.addr, align 4
+  store i32 %bb, i32* %bb.addr, align 4
+  %0 = load i32* %aa.addr, align 4
+  %1 = load i32* %bb.addr, align 4
+  %mul = mul nsw i32 %0, %1
+  store i32 %mul, i32* %cc, align 4
+  %2 = load i32* %aa.addr, align 4
+  %mul1 = mul nsw i32 %2, 17
+  %3 = load i32* %cc, align 4
+  %sub = sub nsw i32 %mul1, %3
+  store i32 %sub, i32* %dd, align 4
+  %4 = load i32* %dd, align 4
+  ret i32 %4
+
+; The nop here is to prevent add/bic to straddle a bundle boundary
+; CHECK:      nop
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: bic	sp, sp, #3221225472
+
+}
+
diff --git a/test/NaCl/ARM/vstr-sandboxing1.ll b/test/NaCl/ARM/vstr-sandboxing1.ll
new file mode 100644
index 0000000000..ef9f98397f
--- /dev/null
+++ b/test/NaCl/ARM/vstr-sandboxing1.ll
@@ -0,0 +1,13 @@
+; RUN: pnacl-llc -mtriple=armv7-unknown-nacl -sfi-store -filetype=obj %s -o - \
+; RUN:  | llvm-objdump -disassemble -triple armv7 - | FileCheck %s
+
+define void @test_vstr_sandbox(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+; CHECK:         bic r0, r0, #3221225472
+; CHECK-NEXT:    vstr {{[0-9a-z]+}}, [r0]
+
+  ret void
+}
+
diff --git a/test/NaCl/Bitcode/alloca-operand.ll b/test/NaCl/Bitcode/alloca-operand.ll
new file mode 100644
index 0000000000..49df2fc8b0
--- /dev/null
+++ b/test/NaCl/Bitcode/alloca-operand.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump | FileCheck %s
+
+; Test that alloca's size operand is represented with a relative value
+; ID, the same as other instructions' operands.
+
+define external void @_start(i32 %arg) {
+; CHECK: <FUNCTION_BLOCK
+; CHECK: </CONSTANTS_BLOCK>
+
+  %size = mul i32 %arg, 4
+; CHECK-NEXT: <INST_BINOP
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=1
+
+  ; Since the operand reference is a relative ID, references to %size
+  ; go up by 1 with each instruction.
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=2
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=3
+
+  ; Reference to a Constant operand.
+  alloca i8, i32 256
+; CHECK-NEXT: <INST_ALLOCA op0=5
+
+  ret void
+; CHECK-NEXT: <INST_RET
+}
diff --git a/test/NaCl/Bitcode/bcanalyzer-width.ll b/test/NaCl/Bitcode/bcanalyzer-width.ll
new file mode 100644
index 0000000000..57899120ef
--- /dev/null
+++ b/test/NaCl/Bitcode/bcanalyzer-width.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=BC
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump -operands-per-line=2 \
+; RUN:              | FileCheck %s -check-prefix=BC2
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump -operands-per-line=8 \
+; RUN:              | FileCheck %s -check-prefix=BC8
+
+; Test that the command-line option -operands-per-line works as expected.
+
+@bytes = internal global [10 x i8] c"abcdefghij"
+
+; BC: <DATA abbrevid=7 op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103 op7=104 op8=105 op9=106/>
+
+; BC2: <DATA abbrevid=7 op0=97 op1=98
+; BC2:       op2=99 op3=100
+; BC2:       op4=101 op5=102
+; BC2:       op6=103 op7=104
+; BC2:       op8=105 op9=106/>
+
+; BC8: <DATA abbrevid=7 op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103 op7=104
+; BC8:       op8=105 op9=106/>
diff --git a/test/NaCl/Bitcode/forward-ref-decl.ll b/test/NaCl/Bitcode/forward-ref-decl.ll
new file mode 100644
index 0000000000..2aa344d6ac
--- /dev/null
+++ b/test/NaCl/Bitcode/forward-ref-decl.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump | FileCheck %s
+
+; Test that FORWARDTYPEREF declarations are emitted in the correct
+; places.  These are emitted for forward value references inside
+; functions.
+
+define external void @_start(i32 %arg) {
+; CHECK: <FUNCTION_BLOCK
+
+  br label %bb1
+; CHECK: <INST_BR
+
+bb2:
+  ; This instruction contains two forward references, because %x and
+  ; %y are defined later in the function.
+  add i32 %forward1, %forward2
+; CHECK-NEXT: <FORWARDTYPEREF abbrevid=
+; CHECK-NEXT: <FORWARDTYPEREF abbrevid=
+; CHECK-NEXT: <INST_BINOP abbrevid=
+
+  ; The FORWARDTYPEREF declaration should only be emitted once per
+  ; value, so the following references will not emit more of them.
+  add i32 %forward1, %forward2
+; CHECK-NEXT: <INST_BINOP abbrevid=
+
+  ; Test another case of a forward reference.
+  call void @_start(i32 %forward3)
+; CHECK-NEXT: <FORWARDTYPEREF abbrevid=
+; CHECK-NEXT: <INST_CALL
+
+  ; Test that FORWARDTYPEREF is generated for phi nodes (since phi
+  ; node operands are a special case in the writer).
+  br label %bb3
+bb3:
+  phi i32 [ %forward4, %bb2 ]
+; CHECK-NEXT: <INST_BR
+; CHECK-NEXT: <FORWARDTYPEREF abbrevid=
+; CHECK-NEXT: <INST_PHI
+
+  ; Test that FORWARDTYPEREF is generated for switch instructions
+  ; (since switch condition operands are a special case in the
+  ; writer).
+  switch i32 %forward5, label %bb4 [i32 0, label %bb4]
+bb4:
+; CHECK-NEXT: <FORWARDTYPEREF abbrevid=
+; CHECK-NEXT: <INST_SWITCH
+
+  ret void
+; CHECK-NEXT: <INST_RET
+
+bb1:
+  %forward1 = add i32 %arg, 100
+  %forward2 = add i32 %arg, 200
+  %forward3 = add i32 %arg, 300
+  %forward4 = add i32 %arg, 400
+  %forward5 = add i32 %arg, 500
+  br label %bb2
+}
diff --git a/test/NaCl/Bitcode/globalvars.ll b/test/NaCl/Bitcode/globalvars.ll
new file mode 100644
index 0000000000..fd1f8eead2
--- /dev/null
+++ b/test/NaCl/Bitcode/globalvars.ll
@@ -0,0 +1,106 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw | llvm-dis - | FileCheck %s
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump \
+; RUN:              | FileCheck %s -check-prefix=BC
+
+; Test that we generate appropriate bitcode values for global variables.
+
+; Make sure that no struct/array types are generated by the global variables.
+; BC: <TYPE_BLOCK_ID
+; BC-NEXT: <NUMENTRY
+; BC-NEXT: <VOID/>
+; BC-NEXT: <FUNCTION
+; BC-NEXT: <POINTER
+; BC-NEXT: </TYPE_BLOCK_ID>
+
+; Make sure that the function declaration for function func (below)
+; appears before the global variables block.
+; BC: <FUNCTION op0=2 op1=0 op2=0 op3=0/>
+
+; Make sure we begin the globals block after function declarations.
+; BC-NEXT: <GLOBALVAR_BLOCK
+; BC-NEXT: <COUNT op0=15/>
+
+@bytes = internal global [7 x i8] c"abcdefg"
+; CHECK: @bytes = internal global [7 x i8] c"abcdefg"
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <DATA abbrevid=7 op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103/>
+
+
+@ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+; CHECK: @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=8 op0=5/>
+
+@ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+; CHECK: @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=8 op0=0/>
+
+@compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; CHECK: @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <COMPOUND abbrevid=5 op0=2/>
+; BC-NEXT: <DATA abbrevid=7 op0=102 op1=111 op2=111/>
+; BC-NEXT: <RELOC abbrevid=8 op0=0/>
+
+@ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+; CHECK: @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=8 op0=1/>
+
+@addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; CHECK: @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=5 op1=1/>
+
+@addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+; CHECK: @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=5 op1=4294967295/>
+
+@addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+; CHECK: @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=1 op1=1/>
+
+@addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+; CHECK: @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=1 op1=7/>
+
+@addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+; CHECK: @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=1 op1=9/>
+
+@addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+; CHECK: @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=4 op1=1/>
+
+@addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+; CHECK: @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=0/>
+; BC-NEXT: <RELOC abbrevid=9 op0=4 op1=4/>
+
+@ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+; CHECK: @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+; BC-NEXT: <VAR abbrevid=4 op0=4 op1=0/>
+; BC-NEXT: <RELOC abbrevid=8 op0=0/>
+
+@char = internal constant [1 x i8] c"0"
+; CHECK: @char = internal constant [1 x i8] c"0"
+; BC-NEXT: <VAR abbrevid=4 op0=0 op1=1/>
+; BC-NEXT: <DATA abbrevid=7 op0=48/>
+
+@short = internal constant [2 x i8] zeroinitializer
+; CHECK: @short = internal constant [2 x i8] zeroinitializer
+; BC-NEXT:  <VAR abbrevid=4 op0=0 op1=1/>
+; BC-NEXT:  <ZEROFILL abbrevid=6 op0=2/>
+
+; BC-NEXT: </GLOBALVAR_BLOCK>
+
+define void @func() {
+  ret void
+}
+
diff --git a/test/NaCl/Bitcode/implicit-datalayout.ll b/test/NaCl/Bitcode/implicit-datalayout.ll
new file mode 100644
index 0000000000..5a957dffec
--- /dev/null
+++ b/test/NaCl/Bitcode/implicit-datalayout.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw - | llvm-dis - | FileCheck %s
+
+; The "datalayout" field is considered to be implicit in the pexe.  It
+; is not stored in the pexe; the reader adds it implicitly.
+;
+; The most important parts of the datalayout for PNaCl are the pointer
+; size and the endianness ("e" for little endian).
+
+; CHECK: target datalayout = "e{{.*}}p:32:32:32{{.*}}"
diff --git a/test/NaCl/Bitcode/lit.local.cfg b/test/NaCl/Bitcode/lit.local.cfg
new file mode 100644
index 0000000000..19eebc0ac7
--- /dev/null
+++ b/test/NaCl/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/NaCl/Bitcode/struct-types.ll b/test/NaCl/Bitcode/struct-types.ll
new file mode 100644
index 0000000000..a36e6bb0ab
--- /dev/null
+++ b/test/NaCl/Bitcode/struct-types.ll
@@ -0,0 +1,78 @@
+; Checks if llvm bitcode defines a struct type before the pointer type,
+; even if the struct definintion appears after the pointer type, while
+; pnacl bitcode moves the pointer before the struct.
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=LLVM
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump | FileCheck %s -check-prefix=PNACL
+
+%typeB = type { i8, %typeA, i32, %typeA }
+%typeA = type { i16 }
+
+define %typeB* @foo(%typeB* %a) {
+  ret %typeB* %a
+}
+
+define %typeB* @bar(%typeB* %b) {
+  ret %typeB* %b
+}
+
+define i16 @bam(i16 %a) {
+  ret i16 %a
+}
+
+; Show the ordering llvm uses to order types, which is to expand subtypes
+; (including accross pointers) before the type. Expands types for functions
+; in order: @foo, @bar, @bam.
+; LLVM: <TYPE_BLOCK_ID {{.*}}>
+;         i8
+; LLVM:   <INTEGER op0=8/>
+;         i16
+; LLVM:   <INTEGER op0=16/>
+;         %typeA = type { i16 }
+; LLVM:   <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/>
+; LLVM:   <STRUCT_NAMED abbrevid=8 op0=0 op1=1/>
+;         i32
+; LLVM:   <INTEGER op0=32/>
+;         %typeB = type { i8, %typeA, i32, %typeA }
+; LLVM:   <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/>
+; LLVM:   <STRUCT_NAMED abbrevid=8 op0=0 op1=0 op2=2 op3=3 op4=2/>
+;         %typeB*
+; LLVM:   <POINTER abbrevid=4 op0=4 op1=0/>
+;         %typeB* (%typeB*)
+; LLVM:   <FUNCTION abbrevid=5 op0=0 op1=5 op2=5/>
+;         %typeB* (%typeB*)*
+; LLVM:   <POINTER abbrevid=4 op0=6 op1=0/>
+;         i16 (i16)
+; LLVM:   <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/>
+;         i16 (i16)*
+; LLVM:   <POINTER abbrevid=4 op0=8 op1=0/>
+;         type of instruction "RET"
+; LLVM:   <VOID/>
+; LLVM: </TYPE_BLOCK_ID>
+
+; Show the ordering pnacl-freeze uses to order types.
+; PNACL: <TYPE_BLOCK_ID {{.*}}>
+;          %typeB*
+; PNACL:   <POINTER abbrevid=4 op0=8 op1=0/>
+;          i16
+; PNACL:   <INTEGER op0=16/>
+;          type of instruction "RET"
+; PNACL:   <VOID/>
+;          %typeA = type { i16 }
+; PNACL:   <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=65/>
+; PNACL:   <STRUCT_NAMED abbrevid=8 op0=0 op1=1/>
+;          %typeB* (%typeB*)
+; PNACL:   <FUNCTION abbrevid=5 op0=0 op1=0 op2=0/>
+;          %typeB* (%typeB*)*
+; PNACL:   <POINTER abbrevid=4 op0=4 op1=0/>
+;          i8
+; PNACL:   <INTEGER op0=8/>
+;          i32
+; PNACL:   <INTEGER op0=32/>
+;          %typeB = type { i8, %typeA, i32, %typeA }
+; PNACL:   <STRUCT_NAME abbrevid=7 op0=116 op1=121 op2=112 op3=101 op4=66/>
+; PNACL:   <STRUCT_NAMED abbrevid=8 op0=0 op1=6 op2=3 op3=7 op4=3/>
+;          i16 (i16)
+; PNACL:   <FUNCTION abbrevid=5 op0=0 op1=1 op2=1/>
+;          i16 (i16)*
+; PNACL:   <POINTER abbrevid=4 op0=9 op1=0/>
+; PNACL: </TYPE_BLOCK_ID>
diff --git a/test/NaCl/Localmods/lit.local.cfg b/test/NaCl/Localmods/lit.local.cfg
new file mode 100644
index 0000000000..c6106e4746
--- /dev/null
+++ b/test/NaCl/Localmods/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/NaCl/Localmods/test-globalopt-main.ll b/test/NaCl/Localmods/test-globalopt-main.ll
new file mode 100644
index 0000000000..90d0e3193c
--- /dev/null
+++ b/test/NaCl/Localmods/test-globalopt-main.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; Check that our LOCALMOD for the GlobalOpt optimization is working properly.
+; The user code entry point is a function named main that has a single user:
+; a call from _start.
+; @globchar can be folded into an alloca inside @main, and the global can be
+; deleted.
+
+@globchar = internal global i8* null, align 8
+; CHECK-NOT: @globchar = internal global
+
+define internal i32 @main(i32 %argc, i8** %argv) {
+  ; CHECK: @main(i32
+entry:
+  ; CHECK: %globchar = alloca i8*
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  store i32 %argc, i32* %argc.addr, align 4
+  store i8** %argv, i8*** %argv.addr, align 8
+  %0 = load i8*** %argv.addr, align 8
+  %arrayidx = getelementptr inbounds i8** %0, i64 0
+  %1 = load i8** %arrayidx, align 8
+  store i8* %1, i8** @globchar, align 8
+  %2 = load i8** @globchar, align 8
+  %arrayidx1 = getelementptr inbounds i8* %2, i64 1
+  %3 = load i8* %arrayidx1, align 1
+  call void @somefunc(i8 signext %3)
+  ret i32 0
+}
+
+define i32 @_start(i32 %argc, i8** %argv) {
+  %rv = call i32 @main(i32 %argc, i8** %argv)
+  ret i32 %rv
+}
+
+declare void @somefunc(i8 signext)
+
diff --git a/test/NaCl/PNaClABI/abi-addrspace.ll b/test/NaCl/PNaClABI/abi-addrspace.ll
new file mode 100644
index 0000000000..e574a726e4
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-addrspace.ll
@@ -0,0 +1,16 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; This test checks that the "addrspace" pointer attribute is rejected
+; by the PNaCl ABI verifier.  The only allowed address space value is
+; 0 (the default).
+
+@var = addrspace(1) global [4 x i8] c"xxxx"
+; CHECK: Variable var has addrspace attribute (disallowed)
+
+define void @func() {
+  inttoptr i32 0 to i32 addrspace(2)*
+; CHECK: disallowed: bad result type: {{.*}} inttoptr {{.*}} addrspace
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-aliases.ll b/test/NaCl/PNaClABI/abi-aliases.ll
new file mode 100644
index 0000000000..0a1bf9ab11
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-aliases.ll
@@ -0,0 +1,7 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+@aliased_var = internal global [1 x i8] c"x"
+; CHECK-NOT: disallowed
+
+@alias1 = alias [1 x i8]* @aliased_var
+; CHECK: Variable alias1 is an alias (disallowed)
diff --git a/test/NaCl/PNaClABI/abi-alignment.ll b/test/NaCl/PNaClABI/abi-alignment.ll
new file mode 100644
index 0000000000..a31914e4be
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-alignment.ll
@@ -0,0 +1,122 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; Test the "align" attributes that are allowed on load and store
+; instructions.  Note that "cmpxchg" and "atomicrmw" do not take
+; "align" attributes, so are not tested here.
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+
+
+define internal void @allowed_cases(i32 %ptr, float %f, double %d) {
+  %ptr.i32 = inttoptr i32 %ptr to i32*
+  load i32* %ptr.i32, align 1
+  store i32 123, i32* %ptr.i32, align 1
+
+  %ptr.float = inttoptr i32 %ptr to float*
+  load float* %ptr.float, align 1
+  load float* %ptr.float, align 4
+  store float %f, float* %ptr.float, align 1
+  store float %f, float* %ptr.float, align 4
+
+  %ptr.double = inttoptr i32 %ptr to double*
+  load double* %ptr.double, align 1
+  load double* %ptr.double, align 8
+  store double %d, double* %ptr.double, align 1
+  store double %d, double* %ptr.double, align 8
+
+  ; Stricter alignments are required for atomics.
+  load atomic i32* %ptr.i32 seq_cst, align 4
+  store atomic i32 123, i32* %ptr.i32 seq_cst, align 4
+  load atomic float* %ptr.float seq_cst, align 4
+  store atomic float %f, float* %ptr.float seq_cst, align 4
+  load atomic double* %ptr.double seq_cst, align 8
+  store atomic double %d, double* %ptr.double seq_cst, align 8
+
+  ; memcpy() et el take an alignment parameter, which is allowed to be 1.
+  %ptr.p = inttoptr i32 %ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 1, i1 false)
+
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases(i32 %ptr, float %f, double %d, i32 %align) {
+  %ptr.i32 = inttoptr i32 %ptr to i32*
+  load i32* %ptr.i32, align 4
+  store i32 123, i32* %ptr.i32, align 4
+; CHECK: disallowed: bad alignment: {{.*}} load i32{{.*}} align 4
+; CHECK-NEXT: disallowed: bad alignment: store i32{{.*}} align 4
+
+  ; Unusual, not-very-useful alignments are rejected.
+  %ptr.float = inttoptr i32 %ptr to float*
+  load float* %ptr.float, align 2
+  load float* %ptr.float, align 8
+  store float %f, float* %ptr.float, align 2
+  store float %f, float* %ptr.float, align 8
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load float{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load float{{.*}} align 8
+; CHECK-NEXT: disallowed: bad alignment: store float{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: store float{{.*}} align 8
+
+  %ptr.double = inttoptr i32 %ptr to double*
+  load double* %ptr.double, align 2
+  load double* %ptr.double, align 4
+  store double %d, double* %ptr.double, align 2
+  store double %d, double* %ptr.double, align 4
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load double{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load double{{.*}} align 4
+; CHECK-NEXT: disallowed: bad alignment: store double{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: store double{{.*}} align 4
+
+  ; Too-small alignments for atomics are rejected.
+  load atomic i32* %ptr.i32 seq_cst, align 2
+  load atomic float* %ptr.float seq_cst, align 2
+  load atomic double* %ptr.double seq_cst, align 4
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic i32{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic float{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic double{{.*}} align 4
+
+  ; Too-large alignments for atomics are also rejected.
+  load atomic i32* %ptr.i32 seq_cst, align 8
+  load atomic float* %ptr.float seq_cst, align 8
+  load atomic double* %ptr.double seq_cst, align 16
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic i32{{.*}} align 8
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic float{{.*}} align 8
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load atomic double{{.*}} align 16
+
+  ; Non-pessimistic alignments for memcpy() et al are rejected.
+  %ptr.p = inttoptr i32 %ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 4, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 4, i1 false)
+; CHECK-NEXT: bad alignment: call void @llvm.memcpy
+; CHECK-NEXT: bad alignment: call void @llvm.memmove
+; CHECK-NEXT: bad alignment: call void @llvm.memset
+
+  ; Check that the verifier does not crash if the alignment argument
+  ; is not a constant.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 %align, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 %align, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 %align, i1 false)
+; CHECK-NEXT: bad alignment: call void @llvm.memcpy
+; CHECK-NEXT: bad alignment: call void @llvm.memmove
+; CHECK-NEXT: bad alignment: call void @llvm.memset
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll b/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll
new file mode 100644
index 0000000000..c223830ac5
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll
@@ -0,0 +1,35 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; This tests that the arithmetic attributes "nuw" and "nsw" ("no
+; unsigned wrap" and "no signed wrap") and "exact" are disallowed by
+; the PNaCl ABI verifier.
+
+define internal void @allowed_cases() {
+  %add = add i32 1, 2
+  %shl = shl i32 3, 4
+  %udiv = udiv i32 4, 2
+  %lshr = lshr i32 2, 1
+  %ashr = ashr i32 2, 1
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases() {
+  %add = add nsw i32 1, 2
+; CHECK: disallowed: has "nsw" attribute: %add
+  %shl1 = shl nuw i32 3, 4
+; CHECK-NEXT: disallowed: has "nuw" attribute: %shl1
+  %sub = sub nsw nuw i32 5, 6
+; CHECK-NEXT: disallowed: has "nuw" attribute: %sub
+
+  %lshr = lshr exact i32 2, 1
+; CHECK-NEXT: disallowed: has "exact" attribute: %lshr
+  %ashr = ashr exact i32 2, 1
+; CHECK-NEXT: disallowed: has "exact" attribute: %ashr
+  %udiv = udiv exact i32 4, 2
+; CHECK-NEXT: disallowed: has "exact" attribute: %udiv
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-bad-intrinsic.ll b/test/NaCl/PNaClABI/abi-bad-intrinsic.ll
new file mode 100644
index 0000000000..644394038d
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-bad-intrinsic.ll
@@ -0,0 +1,10 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; This intrinsic is declared with the wrong type, using i32* arguments
+; instead of i8*.  Check that the ABI verifier rejects this.  This
+; must be tested in a separate .ll file from the correct intrinsic
+; declarations.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i32* %dest, i32* %src,
+                                        i32 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memcpy.p0i8.p0i8.i32 is a disallowed LLVM intrinsic
diff --git a/test/NaCl/PNaClABI/abi-blockaddress.ll b/test/NaCl/PNaClABI/abi-blockaddress.ll
new file mode 100644
index 0000000000..c8434e2c1f
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-blockaddress.ll
@@ -0,0 +1,11 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+define void @func_with_block() {
+  br label %some_block
+some_block:
+  ret void
+}
+; CHECK-NOT: disallowed
+
+@blockaddr = global i8* blockaddress(@func_with_block, %some_block)
+; CHECK: Global variable blockaddr has non-flattened initializer (disallowed): i8* blockaddress(@func_with_block, %some_block)
diff --git a/test/NaCl/PNaClABI/abi-call-attributes.ll b/test/NaCl/PNaClABI/abi-call-attributes.ll
new file mode 100644
index 0000000000..56c6a8c363
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-call-attributes.ll
@@ -0,0 +1,20 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+define void @func(i32 %arg) {
+  ret void
+}
+
+define void @calls() {
+  call void @func(i32 1) noreturn nounwind
+; CHECK: disallowed: bad call attributes: call void @func(i32 1) #
+
+  call void @func(i32 inreg 1)
+; CHECK-NEXT: disallowed: bad call attributes: call void @func(i32 inreg 1)
+
+  call fastcc void @func(i32 1)
+; CHECK-NEXT: disallowed: bad calling convention: call fastcc void @func(i32 1)
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-debug-info.ll b/test/NaCl/PNaClABI/abi-debug-info.ll
new file mode 100644
index 0000000000..49b31bab91
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-debug-info.ll
@@ -0,0 +1,37 @@
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=0 < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=0 \
+; RUN:   -pnaclabi-allow-debug-metadata < %s | FileCheck %s --check-prefix=DBG
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=1 < %s | \
+; RUN:   FileCheck %s --check-prefix=DBG
+
+
+; DBG-NOT: disallowed
+
+
+declare void @llvm.dbg.declare(metadata, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+; CHECK: Function llvm.dbg.declare is a disallowed LLVM intrinsic
+; CHECK: Function llvm.dbg.value is a disallowed LLVM intrinsic
+
+
+define internal void @debug_declare(i32 %val) {
+  ; We normally expect llvm.dbg.declare to be used on an alloca.
+  %var = alloca [4 x i8]
+  tail call void @llvm.dbg.declare(metadata !{[4 x i8]* %var}, metadata !{})
+  tail call void @llvm.dbg.declare(metadata !{i32 %val}, metadata !{})
+  ret void
+}
+
+define internal void @debug_value(i32 %ptr_as_int, i32 %val) {
+  %ptr = inttoptr i32 %ptr_as_int to i8*
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 2, metadata !{})
+  tail call void @llvm.dbg.value(metadata !{i32 %val}, i64 1, metadata !{})
+  ret void
+}
+
+; FileCheck gives an error if its input file is empty, so ensure that
+; the output of pnacl-abicheck is non-empty by generating at least one
+; error.
+declare void @bad_func(ppc_fp128 %bad_arg)
+; DBG: Function bad_func has disallowed type: void (ppc_fp128)
diff --git a/test/NaCl/PNaClABI/abi-externals-whitelist.ll b/test/NaCl/PNaClABI/abi-externals-whitelist.ll
new file mode 100644
index 0000000000..e0ec6e896b
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-externals-whitelist.ll
@@ -0,0 +1,24 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; Make sure that external symbols are properly rejected or accepted
+
+define void @foo() {
+  ret void
+}
+
+; CHECK: foo is not a valid external symbol (disallowed)
+
+define external void @main() {
+  ret void
+}
+; CHECK: main is not a valid external symbol (disallowed)
+
+define external void @_start() {
+  ret void
+}
+; _start is whitelisted
+; CHECK-NOT: _start is not a valid external symbol (disallowed)
+
+; Intrinsics can be external too
+declare void @llvm.trap()
+
diff --git a/test/NaCl/PNaClABI/abi-flattened-globals.ll b/test/NaCl/PNaClABI/abi-flattened-globals.ll
new file mode 100644
index 0000000000..38f9ca8109
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-flattened-globals.ll
@@ -0,0 +1,69 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+
+; Allowed cases
+
+@bytes = internal global [7 x i8] c"abcdefg"
+
+@ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+@ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+
+@compound = internal global <{ [3 x i8], i32 }>
+    <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+
+@ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+
+@addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+@addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+
+@addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+@addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+@addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+
+@addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+@addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+
+; CHECK-NOT: disallowed
+
+
+; Disallowed cases
+
+@bad_external = external global [1 x i8]
+; CHECK: Global variable bad_external has no initializer (disallowed)
+
+@bad_int = internal global i32 0
+; CHECK: Global variable bad_int has non-flattened initializer (disallowed): i32 0
+
+@bad_size = internal global i64 ptrtoint ([7 x i8]* @bytes to i64)
+; CHECK: Global variable bad_size has non-flattened initializer
+
+; "null" is not allowed.
+@bad_ptr = internal global i8* null
+; CHECK: Global variable bad_ptr has non-flattened initializer
+
+@bad_ptr2 = internal global i64 ptrtoint (i8* null to i64)
+; CHECK: Global variable bad_ptr2 has non-flattened initializer
+
+@bad_sub = internal global i32 sub (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; CHECK: Global variable bad_sub has non-flattened initializer
+
+; i16 not allowed here.
+@bad_compound = internal global <{ i32, i16 }>
+    <{ i32 ptrtoint (void ()* @func to i32), i16 0 }>
+; CHECK: Global variable bad_compound has non-flattened initializer
+
+; The struct type must be packed.
+@non_packed_struct = internal global { [3 x i8], i32 }
+    { [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }
+; CHECK: Global variable non_packed_struct has non-flattened initializer
+
+; The struct type must be anonymous.
+%struct = type <{ [3 x i8], i32 }>
+@named_struct = internal global %struct
+    <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; CHECK: Global variable named_struct has non-flattened initializer
+
+
+define internal void @func() {
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/abi-i1-operations.ll b/test/NaCl/PNaClABI/abi-i1-operations.ll
new file mode 100644
index 0000000000..4c63683bb8
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-i1-operations.ll
@@ -0,0 +1,66 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; Most arithmetic operations are not very useful on i1, so use of i1
+; is restricted to a subset of operations.
+
+
+; i1 is allowed on these bitwise operations because:
+;  * These operations never overflow.
+;  * They do get generated in practice for combining conditions.
+define internal void @allowed_cases() {
+  %and = and i1 0, 0
+  %or = or i1 0, 0
+  %xor = xor i1 0, 0
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases(i32 %ptr) {
+  ; Loads and stores of i1 are disallowed.  This is done by rejecting
+  ; i1* as a pointer type.
+  %ptr.p = inttoptr i32 %ptr to i1*
+; CHECK: disallowed: bad result type: %ptr.p = inttoptr
+  load i1* %ptr.p, align 1
+; CHECK-NEXT: disallowed: bad pointer: {{.*}} load i1*
+
+  ; i1 arithmetic is of dubious usefulness, so it is rejected.
+  add i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} add
+  sub i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} sub
+  mul i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} mul
+  udiv i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} udiv
+  sdiv i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} sdiv
+  urem i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} urem
+  srem i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} srem
+  shl i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} shl
+  lshr i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} lshr
+  ashr i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} ashr
+
+  ; The same applies to i1 comparisons.
+  icmp eq i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} icmp eq
+  icmp ult i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} icmp ult
+
+  ; There should be no implicit zero-extension in alloca.
+  alloca i8, i1 1
+; CHECK-NEXT: disallowed: alloca array size is not i32
+
+  ; Switch on i1 is not useful.  "br" should be used instead.
+  switch i1 0, label %next [i1 0, label %next]
+; CHECK-NEXT: disallowed: switch on i1
+next:
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-metadata.ll b/test/NaCl/PNaClABI/abi-metadata.ll
new file mode 100644
index 0000000000..751a3d3673
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-metadata.ll
@@ -0,0 +1,19 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-debug-metadata < %s | FileCheck %s --check-prefix=DEBUG
+
+
+; Metadata is not part of the PNaCl's stable ABI, so normally the ABI
+; checker rejects metadata entirely.  However, for debugging support,
+; pre-finalized pexes may contain metadata.  When checking a
+; pre-finalized pexe, the ABI checker does not check the types in the
+; metadata.
+
+; DEBUG-NOT: Named metadata node llvm.dbg.cu is disallowed
+; CHECK: Named metadata node llvm.dbg.cu is disallowed
+!llvm.dbg.cu = !{!0}
+!0 = metadata !{ half 0.0}
+
+; CHECK: Named metadata node madeup is disallowed
+; DEBUG: Named metadata node madeup is disallowed
+!madeup = !{!1}
+!1 = metadata !{ half 1.0}
diff --git a/test/NaCl/PNaClABI/abi-small-arguments.ll b/test/NaCl/PNaClABI/abi-small-arguments.ll
new file mode 100644
index 0000000000..ce698e7d47
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-small-arguments.ll
@@ -0,0 +1,52 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+define void @arg_i1(i1 %bad) {
+  ret void
+}
+; CHECK: Function arg_i1 has disallowed type:
+
+define void @arg_i16(i32 %allowed, i16 %bad) {
+  ret void
+}
+; CHECK: Function arg_i16 has disallowed type:
+
+define i1 @return_i1() {
+  ret i1 0
+}
+; CHECK: Function return_i1 has disallowed type:
+
+define i8 @return_i8() {
+  ret i8 0
+}
+; CHECK: Function return_i8 has disallowed type:
+
+
+define void @bad_direct_calls() {
+  call void @arg_i1(i1 0)
+; CHECK: bad function callee operand: call void @arg_i1
+
+  call void @arg_i16(i32 0, i16 0)
+; CHECK-NEXT: bad function callee operand: call void @arg_i16
+
+  %result1 = call i1 @return_i1()
+; CHECK-NEXT: bad function callee operand: {{.*}} call i1 @return_i1
+
+  %result2 = call i8 @return_i8()
+; CHECK-NEXT: bad function callee operand: {{.*}} call i8 @return_i8
+
+  ret void
+}
+
+define void @bad_indirect_calls(i32 %ptr) {
+  %func1 = inttoptr i32 %ptr to void (i8)*
+; CHECK: bad result type: %func1
+  call void %func1(i8 0)
+; CHECK: bad function callee operand: {{.*}} %func1
+
+  %func2 = inttoptr i32 %ptr to i16 ()*
+; CHECK: bad result type: %func2
+  %result3 = call i16 %func2()
+; CHECK: bad function callee operand: {{.*}} %func2
+
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/abi-stripped-pointers.ll b/test/NaCl/PNaClABI/abi-stripped-pointers.ll
new file mode 100644
index 0000000000..8f23cbce16
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-stripped-pointers.ll
@@ -0,0 +1,134 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; This test checks that the PNaCl ABI verifier enforces the normal
+; form introduced by the ReplacePtrsWithInts pass.
+
+
+@var = global [4 x i8] c"xxxx"
+@ptr = global i32 ptrtoint ([4 x i8]* @var to i32)
+
+declare i8* @llvm.nacl.read.tp()
+
+
+define internal void @pointer_arg(i8* %arg) {
+  ret void
+}
+; CHECK: Function pointer_arg has disallowed type
+
+define internal i8* @pointer_return() {
+  unreachable
+}
+; CHECK-NEXT: Function pointer_return has disallowed type
+
+define internal void @func() {
+  ret void
+}
+
+define internal void @func_with_arg(i32 %arg) {
+  ret void
+}
+
+
+define internal void @allowed_cases(i32 %arg) {
+  inttoptr i32 123 to i8*
+
+  ptrtoint [4 x i8]* @var to i32
+
+  %alloc = alloca i8
+  ptrtoint i8* %alloc to i32
+  load i8* %alloc, align 1
+
+  ; These instructions may use a NormalizedPtr, which may be a global.
+  load i32* @ptr, align 1
+  store i32 123, i32* @ptr, align 1
+  cmpxchg i32* @ptr, i32 1, i32 2 seq_cst
+  atomicrmw add i32* @ptr, i32 3 seq_cst
+
+  ; A NormalizedPtr may be a bitcast.
+  %ptr_bitcast = bitcast [4 x i8]* @var to i32*
+  load i32* %ptr_bitcast, align 1
+
+  ; A NormalizedPtr may be an inttoptr.
+  %ptr_from_int = inttoptr i32 123 to i32*
+  load i32* %ptr_from_int, align 1
+
+  ; Check direct and indirect function calls.
+  %func_as_int = ptrtoint void ()* @func to i32
+  %func_ptr = inttoptr i32 %func_as_int to void ()*
+  call void %func_ptr()
+  call void @func()
+  call void @func_with_arg(i32 123)
+
+  ; Intrinsic calls may return pointers.
+  %thread_ptr = call i8* @llvm.nacl.read.tp()
+  ptrtoint i8* %thread_ptr to i32
+
+  ; Bitcasts between non-pointers are not restricted
+  bitcast i64 0 to double
+  bitcast i32 0 to float
+
+  ; ConstantInts and Arguments are allowed as operands.
+  add i32 %arg, 123
+
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @bad_cases() {
+entry:
+  ptrtoint [4 x i8]* @var to i16
+; CHECK: Function bad_cases disallowed: non-i32 ptrtoint
+
+  inttoptr i16 123 to i8*
+; CHECK-NEXT: non-i32 inttoptr
+
+  %a = alloca i32
+; CHECK-NEXT: non-i8 alloca: %a
+  %a2 = alloca [4 x i8]
+; CHECK-NEXT: non-i8 alloca: %a2
+
+  store i32 0, i32* null, align 1
+; CHECK-NEXT: bad pointer
+
+  store i32 0, i32* undef, align 1
+; CHECK-NEXT: bad pointer
+
+  %bc = bitcast i32* @ptr to i31*
+; CHECK-NEXT: bad result type
+  store i31 0, i31* %bc, align 1
+; CHECK-NEXT: bad pointer
+
+  ; Only one level of bitcasts is allowed.
+  %b = bitcast i32* %a to i8*
+  %c = bitcast i8* %b to i16*
+; CHECK-NEXT: operand not InherentPtr
+
+  br label %block
+block:
+  %phi1 = phi i8* [ undef, %entry ]
+; CHECK-NEXT: bad operand: %phi1
+  %phi2 = phi i32* [ undef, %entry ]
+; CHECK-NEXT: bad operand: %phi2
+
+  icmp eq i32* @ptr, @ptr
+; CHECK-NEXT: bad operand: {{.*}} icmp
+  icmp eq void ()* @func, @func
+; CHECK-NEXT: bad operand: {{.*}} icmp
+  icmp eq i31 0, 0
+; CHECK-NEXT: bad operand: {{.*}} icmp
+
+  call void null()
+; CHECK-NEXT: bad function callee operand
+
+  call void @func_with_arg(i32 ptrtoint (i32* @ptr to i32))
+; CHECK-NEXT: bad operand
+
+  ; Taking the address of an intrinsic is not allowed.
+  ptrtoint i8* ()* @llvm.nacl.read.tp to i32
+; CHECK-NEXT: operand not InherentPtr
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-switch.ll b/test/NaCl/PNaClABI/abi-switch.ll
new file mode 100644
index 0000000000..c545f4e1f7
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-switch.ll
@@ -0,0 +1,31 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+@var = internal global [4 x i8] c"xxxx"
+
+
+; CHECK-NOT: disallowed
+
+define internal void @bad_cases() {
+  ; ConstantExprs should be rejected here.
+  switch i32 ptrtoint ([4 x i8]* @var to i32), label %next [i32 0, label %next]
+; CHECK: disallowed: bad switch condition
+next:
+
+  ; Bad integer type.
+  switch i32 0, label %next [i99 0, label %next]
+; CHECK: bad switch case
+
+  ; Bad integer type.
+  switch i32 0, label %next [i32 0, label %next
+                             i99 1, label %next]
+; CHECK: bad switch case
+
+  ; Note that the reader only allows ConstantInts in the label list.
+  ; We don't need to check the following, because the reader rejects
+  ; it:
+  ; switch i32 0, label %next [i32 ptrtoint (i32* @ptr to i32), label %next]
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-varargs.ll b/test/NaCl/PNaClABI/abi-varargs.ll
new file mode 100644
index 0000000000..dac94e00e0
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-varargs.ll
@@ -0,0 +1,13 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+define void @varargs_func(i32 %arg, ...) {
+  ret void
+}
+; CHECK: Function varargs_func has disallowed type: void (i32, ...)
+
+define void @call_varargs_func(i32 %ptr) {
+  %ptr2 = inttoptr i32 %ptr to void (i32, ...)*
+  call void (i32, ...)* %ptr2(i32 123)
+  ret void
+}
+; CHECK: Function call_varargs_func disallowed: bad function callee operand: call void (i32, ...)*
diff --git a/test/NaCl/PNaClABI/abi-visibility.ll b/test/NaCl/PNaClABI/abi-visibility.ll
new file mode 100644
index 0000000000..1c54b248b4
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-visibility.ll
@@ -0,0 +1,15 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; Disallow the visibility attributes set by
+; __attribute__((visibility("hidden"))) and
+; __attribute__((visibility("protected"))).
+
+define internal hidden void @visibility_hidden() {
+  ret void
+}
+; CHECK: Function visibility_hidden has disallowed visibility: hidden
+
+define internal protected void @visibility_protected() {
+  ret void
+}
+; CHECK-NEXT: Function visibility_protected has disallowed visibility: protected
diff --git a/test/NaCl/PNaClABI/global-attributes.ll b/test/NaCl/PNaClABI/global-attributes.ll
new file mode 100644
index 0000000000..45ed9c2fb6
--- /dev/null
+++ b/test/NaCl/PNaClABI/global-attributes.ll
@@ -0,0 +1,62 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+; Global variable attributes
+
+; CHECK: Variable var_with_section has disallowed "section" attribute
+@var_with_section = internal global [1 x i8] zeroinitializer, section ".some_section"
+
+; PNaCl programs can depend on data alignments in general, so we allow
+; "align" on global variables.
+; CHECK-NOT: var_with_alignment
+@var_with_alignment = internal global [4 x i8] zeroinitializer, align 8
+
+; TLS variables must be expanded out by ExpandTls.
+; CHECK-NEXT: Variable tls_var has disallowed "thread_local" attribute
+@tls_var = internal thread_local global [4 x i8] zeroinitializer
+
+; CHECK-NEXT: Variable var_with_unnamed_addr has disallowed "unnamed_addr" attribute
+@var_with_unnamed_addr = internal unnamed_addr constant [1 x i8] c"x"
+
+; CHECK-NEXT: Variable var_ext_init has disallowed "externally_initialized" attribute
+@var_ext_init = internal externally_initialized global [1 x i8] c"x"
+
+
+; Function attributes
+
+; CHECK-NEXT: Function func_with_attrs has disallowed attributes: noreturn nounwind
+define internal void @func_with_attrs() noreturn nounwind {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_arg_attrs has disallowed attributes: inreg zeroext
+define internal void @func_with_arg_attrs(i32 inreg zeroext) {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_callingconv has disallowed calling convention: 8
+define internal fastcc void @func_with_callingconv() {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_section has disallowed "section" attribute
+define internal void @func_with_section() section ".some_section" {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_alignment has disallowed "align" attribute
+define internal void @func_with_alignment() align 1 {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_gc has disallowed "gc" attribute
+define internal void @func_with_gc() gc "my_gc_func" {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_unnamed_addr has disallowed "unnamed_addr" attribute
+define internal void @func_with_unnamed_addr() unnamed_addr {
+  ret void
+}
+
+; CHECK-NOT: disallowed
+; If another check is added, there should be a check-not in between each check
diff --git a/test/NaCl/PNaClABI/instcombine.ll b/test/NaCl/PNaClABI/instcombine.ll
new file mode 100644
index 0000000000..e21eea45c4
--- /dev/null
+++ b/test/NaCl/PNaClABI/instcombine.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Test that instcombine does not introduce non-power-of-two integers into
+; the module
+
+target datalayout = "p:32:32:32"
+
+; This test is a counterpart to icmp_shl16 in
+; test/Transforms/InstCombine/icmp.ll, which should still pass.
+; CHECK: @icmp_shl31
+; CHECK-NOT: i31
+define i1 @icmp_shl31(i32 %x) {
+  %shl = shl i32 %x, 1
+  %cmp = icmp slt i32 %shl, 36
+  ret i1 %cmp
+}
+
+; Check that we don't introduce i4, which is a power of 2 but still not allowed.
+; CHECK: @icmp_shl4
+; CHECK-NOT: i4
+define i1 @icmp_shl4(i32 %x) {
+  %shl = shl i32 %x, 28
+  %cmp = icmp slt i32 %shl, 1073741824
+  ret i1 %cmp
+}
diff --git a/test/NaCl/PNaClABI/instructions.ll b/test/NaCl/PNaClABI/instructions.ll
new file mode 100644
index 0000000000..eb659cbffb
--- /dev/null
+++ b/test/NaCl/PNaClABI/instructions.ll
@@ -0,0 +1,163 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; Test instruction opcodes allowed by PNaCl ABI
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+define internal void @terminators() {
+; Terminator instructions
+terminators:
+ ret void
+ br i1 0, label %next2, label %next
+next:
+ switch i32 1, label %next2 [i32 0, label %next]
+next2:
+  unreachable
+; CHECK-NOT: disallowed
+; CHECK: Function terminators disallowed: bad instruction opcode: indirectbr
+  indirectbr i8* undef, [label %next, label %next2]
+}
+
+define internal void @binops() {
+; Binary operations
+  %a1 = add i32 0, 0
+  %a2 = sub i32 0, 0
+  %a3 = fsub float 0.0, 0.0
+  %a4 = mul i32 0, 0
+  %a5 = fmul float 0.0, 0.0
+  %a6 = udiv i32 0, 1
+  %a7 = sdiv i32 0, 1
+  %a8 = fdiv float 0.0, 1.0
+  %a9 = urem i32 0, 1
+  %a10 = srem i32 0, 1
+  %a11 = frem float 0.0, 1.0
+; Bitwise binary operations
+  %a12 = shl i32 1, 1
+  %a13 = lshr i32 1, 1
+  %a14 = ashr i32 1, 1
+  %a15 = and i32 1, 1
+  %a16 = or i32 1, 1
+  %a17 = xor i32 1, 1
+  ret void
+}
+
+define internal void @vectors() {
+; CHECK-NOT: disallowed
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} extractelement
+  %a1 = extractelement <2 x i32> <i32 0, i32 0>, i32 0
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} shufflevector
+  %a2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> undef
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} insertelement
+  %a3 = insertelement <2 x i32> undef, i32 1, i32 0
+
+  ret void
+}
+
+define internal void @aggregates() {
+; CHECK-NOT: disallowed
+
+; Aggregate operations
+  %a1 = extractvalue { i32, i32 } { i32 0, i32 0 }, 0
+; CHECK: disallowed: bad instruction opcode: {{.*}} extractvalue
+
+  %a2 = insertvalue {i32, float} undef, i32 1, 0
+; CHECK-NEXT: disallowed: bad instruction opcode: {{.*}} insertvalue
+
+  ret void
+}
+
+define internal void @memory() {
+; Memory operations
+  %a1 = alloca i8, i32 4
+  %ptr = inttoptr i32 0 to i32*
+  %a2 = load i32* %ptr, align 1
+  store i32 undef, i32* %ptr, align 1
+  fence acq_rel
+  %a3 = cmpxchg i32* %ptr, i32 undef, i32 undef acq_rel
+  %a4 = atomicrmw add i32* %ptr, i32 1 acquire
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: {{.*}} getelementptr
+  %a5 = getelementptr { i32, i32}* undef
+  ret void
+}
+
+define internal void @conversion() {
+; Conversion operations
+  %a1 = trunc i32 undef to i8
+  %a2 = zext i8 undef to i32
+  %a3 = sext i8 undef to i32
+  %a4 = fptrunc double undef to float
+  %a5 = fpext float undef to double
+  %a6 = fptoui double undef to i64
+  %a7 = fptosi double undef to i64
+  %a8 = uitofp i64 undef to double
+  %a9 = sitofp i64 undef to double
+  ret void
+}
+
+define internal void @other() {
+entry:
+  %a1 = icmp eq i32 undef, undef
+  %a2 = fcmp oeq float undef, undef
+  br i1 undef, label %foo, label %bar
+foo:
+; phi predecessor labels have to match to appease module verifier
+  %a3 = phi i32 [0, %entry], [0, %foo]
+  %a4 = select i1 true, i8 undef, i8 undef
+  call void @conversion()
+  br i1 undef, label %foo, label %bar
+bar:
+  ret void
+}
+
+define internal void @throwing_func() {
+  ret void
+}
+define internal void @personality_func() {
+  ret void
+}
+
+define internal void @invoke_func() {
+  invoke void @throwing_func() to label %ok unwind label %onerror
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: invoke
+ok:
+  ret void
+onerror:
+  %lp = landingpad i32
+      personality i8* bitcast (void ()* @personality_func to i8*)
+      catch i32* null
+; CHECK: disallowed: bad instruction opcode: {{.*}} landingpad
+  resume i32 %lp
+; CHECK: disallowed: bad instruction opcode: resume
+}
+
+define internal i32 @va_arg(i32 %va_list_as_int) {
+  %va_list = inttoptr i32 %va_list_as_int to i8*
+  %val = va_arg i8* %va_list, i32
+  ret i32 %val
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: {{.*}} va_arg
+
+@global_var = internal global [4 x i8] zeroinitializer
+
+define internal void @constantexpr() {
+  ptrtoint i8* getelementptr ([4 x i8]* @global_var, i32 1, i32 0) to i32
+  ret void
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: operand not InherentPtr: %1 = ptrtoint i8* getelementptr
+
+define internal void @inline_asm() {
+  call void asm "foo", ""()
+  ret void
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: inline assembly: call void asm "foo", ""()
+
+; CHECK-NOT: disallowed
+; If another check is added, there should be a check-not in between each check
diff --git a/test/NaCl/PNaClABI/intrinsics.ll b/test/NaCl/PNaClABI/intrinsics.ll
new file mode 100644
index 0000000000..7c5e76e795
--- /dev/null
+++ b/test/NaCl/PNaClABI/intrinsics.ll
@@ -0,0 +1,121 @@
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=0 < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=0 \
+; RUN:   -pnaclabi-allow-debug-metadata < %s | FileCheck %s --check-prefix=DBG
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics=1 < %s | \
+; RUN:   FileCheck %s --check-prefix=DEV
+
+; Test that only white-listed intrinsics are allowed.
+
+; ===================================
+; Some disallowed "Dev" intrinsics.
+; CHECK: Function llvm.dbg.value is a disallowed LLVM intrinsic
+; DBG-NOT: Function llvm.dbg.value is a disallowed LLVM intrinsic
+; DEV-NOT: Function llvm.dbg.value is a disallowed LLVM intrinsic
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+; ===================================
+; Always allowed intrinsics.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src,
+                                        i32 %len, i32 %align, i1 %isvolatile)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src,
+                                         i32 %len, i32 %align, i1 %isvolatile)
+declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val,
+                                    i32 %len, i32 %align, i1 %isvolatile)
+
+declare i8* @llvm.nacl.read.tp()
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+
+declare void @llvm.trap()
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+declare void @llvm.nacl.longjmp(i8*, i32)
+declare i32 @llvm.nacl.setjmp(i8*)
+
+; CHECK-NOT: disallowed
+
+; ===================================
+; Always disallowed intrinsics.
+
+; CHECK: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+; DBG: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+; DEV: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+declare i8* @llvm.adjust.trampoline(i8*)
+
+; CHECK: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+; DBG: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+; DEV: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
+
+; CHECK: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+; DBG: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+; DEV: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+
+; CHECK: Function llvm.va_copy is a disallowed LLVM intrinsic
+; DBG: Function llvm.va_copy is a disallowed LLVM intrinsic
+; DEV: Function llvm.va_copy is a disallowed LLVM intrinsic
+declare void @llvm.va_copy(i8*, i8*)
+
+; CHECK: Function llvm.bswap.i1 is a disallowed LLVM intrinsic
+declare i1 @llvm.bswap.i1(i1)
+
+; CHECK: Function llvm.bswap.i8 is a disallowed LLVM intrinsic
+declare i8 @llvm.bswap.i8(i8)
+
+; CHECK: Function llvm.ctlz.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.ctlz.i16(i16, i1)
+
+; CHECK: Function llvm.cttz.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.cttz.i16(i16, i1)
+
+; CHECK: Function llvm.ctpop.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.ctpop.i16(i16)
+
+; CHECK: Function llvm.lifetime.start is a disallowed LLVM intrinsic
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+; CHECK: Function llvm.lifetime.end is a disallowed LLVM intrinsic
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+; CHECK: Function llvm.frameaddress is a disallowed LLVM intrinsic
+declare i8* @llvm.frameaddress(i32 %level)
+
+; CHECK: Function llvm.returnaddress is a disallowed LLVM intrinsic
+declare i8* @llvm.returnaddress(i32 %level)
+
+; CHECK: Function llvm.sqrt.fp128 is a disallowed LLVM intrinsic
+declare fp128 @llvm.sqrt.fp128(fp128)
+
+; The variants with 64-bit %len arguments are disallowed.
+; CHECK: Function llvm.memcpy.p0i8.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src,
+                                        i64 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memmove.p0i8.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src,
+                                         i64 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memset.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val,
+                                    i64 %len, i32 %align, i1 %isvolatile)
+
+; Test that the ABI checker checks the full function name.
+; CHECK: Function llvm.memset.foo is a disallowed LLVM intrinsic
+declare void @llvm.memset.foo(i8* %dest, i8 %val,
+                              i64 %len, i32 %align, i1 %isvolatile)
diff --git a/test/NaCl/PNaClABI/linkagetypes.ll b/test/NaCl/PNaClABI/linkagetypes.ll
new file mode 100644
index 0000000000..fffaadc2ee
--- /dev/null
+++ b/test/NaCl/PNaClABI/linkagetypes.ll
@@ -0,0 +1,84 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; Test linkage types allowed by PNaCl ABI
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+
+@gv_internal = internal global [1 x i8] c"x"
+; CHECK-NOT: disallowed
+
+@gv_private = private global [1 x i8] c"x"
+; CHECK: Variable gv_private has disallowed linkage type: private
+@gv_linker_private = linker_private global [1 x i8] c"x"
+; CHECK: Variable gv_linker_private has disallowed linkage type: linker_private
+@gv_linker_private_weak = linker_private_weak global [1 x i8] c"x"
+; CHECK: gv_linker_private_weak has disallowed linkage type: linker_private_weak
+@gv_linkonce = linkonce global [1 x i8] c"x"
+; CHECK: gv_linkonce has disallowed linkage type: linkonce
+@gv_linkonce_odr = linkonce_odr global [1 x i8] c"x"
+; CHECK: gv_linkonce_odr has disallowed linkage type: linkonce_odr
+@gv_linkonce_odr_auto_hide = linkonce_odr_auto_hide global [1 x i8] c"x"
+; CHECK: gv_linkonce_odr_auto_hide has disallowed linkage type: linkonce_odr_auto_hide
+@gv_weak = weak global [1 x i8] c"x"
+; CHECK: gv_weak has disallowed linkage type: weak
+@gv_weak_odr = weak_odr global [1 x i8] c"x"
+; CHECK: gv_weak_odr has disallowed linkage type: weak_odr
+@gv_common = common global [1 x i8] c"x"
+; CHECK: gv_common has disallowed linkage type: common
+@gv_appending = appending global [1 x i8] zeroinitializer
+; CHECK: gv_appending has disallowed linkage type: appending
+@gv_dllimport = dllimport global [1 x i8]
+; CHECK: gv_dllimport has disallowed linkage type: dllimport
+@gv_dllexport = dllexport global [1 x i8] c"x"
+; CHECK: gv_dllexport has disallowed linkage type: dllexport
+@gv_extern_weak = extern_weak global [1 x i8]
+; CHECK: gv_extern_weak has disallowed linkage type: extern_weak
+@gv_avilable_externally = available_externally global [1 x i8] c"x"
+; CHECK: gv_avilable_externally has disallowed linkage type: available_externally
+
+
+; CHECK-NOT: disallowed
+; CHECK-NOT: internal_func
+; internal linkage is allowed, and should not appear in error output.
+define internal void @internal_func() {
+  ret void
+}
+
+; CHECK: Function private_func has disallowed linkage type: private
+define private void @private_func() {
+  ret void
+}
+; CHECK: Function external_func is declared but not defined (disallowed)
+declare external void @external_func()
+; CHECK: linkonce_func has disallowed linkage type: linkonce
+define linkonce void @linkonce_func() {
+  ret void
+}
+; CHECK-NEXT: linkonce_odr_func has disallowed linkage type: linkonce_odr
+define linkonce_odr void @linkonce_odr_func() {
+  ret void
+}
+; CHECK-NEXT: weak_func has disallowed linkage type: weak
+define weak void @weak_func() {
+  ret void
+}
+; CHECK-NEXT: weak_odr_func has disallowed linkage type: weak_odr
+define weak_odr void @weak_odr_func() {
+  ret void
+}
+; CHECK-NEXT: dllimport_func is declared but not defined (disallowed)
+; CHECK-NEXT: dllimport_func has disallowed linkage type: dllimport
+declare dllimport void @dllimport_func()
+; CHECK-NEXT: dllexport_func has disallowed linkage type: dllexport
+define dllexport void @dllexport_func() {
+  ret void
+}
+; CHECK-NEXT: Function extern_weak_func is declared but not defined (disallowed)
+; CHECK-NEXT: Function extern_weak_func has disallowed linkage type: extern_weak
+declare extern_weak void @extern_weak_func()
+
+; CHECK-NEXT: Function avail_ext_func has disallowed linkage type: available_externally
+define available_externally void @avail_ext_func() {
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/lit.local.cfg b/test/NaCl/PNaClABI/lit.local.cfg
new file mode 100644
index 0000000000..c6106e4746
--- /dev/null
+++ b/test/NaCl/PNaClABI/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/NaCl/PNaClABI/module-asm.ll b/test/NaCl/PNaClABI/module-asm.ll
new file mode 100644
index 0000000000..aab7c709e8
--- /dev/null
+++ b/test/NaCl/PNaClABI/module-asm.ll
@@ -0,0 +1,4 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+
+module asm "foo"
+; CHECK: Module contains disallowed top-level inline assembly
diff --git a/test/NaCl/PNaClABI/types-function.ll b/test/NaCl/PNaClABI/types-function.ll
new file mode 100644
index 0000000000..a3fb55b23c
--- /dev/null
+++ b/test/NaCl/PNaClABI/types-function.ll
@@ -0,0 +1,40 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; Test type-checking in function bodies. This test is not intended to verify
+; all the rules about the various types, but instead to make sure that types
+; stashed in various places in function bodies are caught.
+
+@a2 = private global i17 zeroinitializer
+
+; CHECK: Function func has disallowed type: void (i15)
+declare void @func(i15 %arg)
+
+!llvm.foo = !{!0}
+!0 = metadata !{ half 0.0}
+
+define void @types() {
+; CHECK: bad result type: {{.*}} fptrunc
+  %h1 = fptrunc double undef to half
+
+; CHECK: bad operand: {{.*}} bitcast half
+  %h2 = bitcast half 0.0 to i16
+
+; see below...
+  %h3 = fadd double 0.0, fpext (half 0.0 to double)
+
+; CHECK: bad pointer: store
+  store i32 0, i32* bitcast (i17* @a2 to i32*), align 1
+
+; CHECK: bad function callee operand: call void @func(i15 1)
+  call void @func(i15 1)
+
+; CHECK: Function types has disallowed instruction metadata: !foo
+  ret void, !foo !0
+}
+; CHECK-NOT: disallowed
+
+
+; TODO:
+; the bitcode reader seems to expand some operations inline
+; (e.g. fpext, sext, uitofp) such that doing something like
+;   %h3 = fadd double 0.0, fpext (half 0.0 to double)
+; means the verifier pass will never see the fpext or its operands
diff --git a/test/NaCl/PNaClABI/types.ll b/test/NaCl/PNaClABI/types.ll
new file mode 100644
index 0000000000..6b8335504e
--- /dev/null
+++ b/test/NaCl/PNaClABI/types.ll
@@ -0,0 +1,136 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; Test types allowed by PNaCl ABI
+
+
+; CHECK: Function badReturn has disallowed type: half* ()
+define internal half* @badReturn() {
+  unreachable
+}
+
+; CHECK: Function badArgType1 has disallowed type: void (half, i32)
+define internal void @badArgType1(half %a, i32 %b) {
+  ret void
+}
+; CHECK: Function badArgType2 has disallowed type: void (i32, half)
+define internal void @badArgType2(i32 %a, half %b) {
+  ret void
+}
+
+
+define internal void @func() {
+entry:
+  br label %block
+block:
+
+  ; We test for allowed/disallowed types via phi nodes.  This gives us
+  ; a uniform way to test any type.
+
+  ; Allowed types
+
+  phi i1 [ undef, %entry ]
+  phi i8 [ undef, %entry ]
+  phi i16 [ undef, %entry ]
+  phi i32 [ undef, %entry ]
+  phi i64 [ undef, %entry ]
+  phi float [ undef, %entry ]
+  phi double [ undef, %entry ]
+; CHECK-NOT: disallowed
+
+
+  ; Disallowed integer types
+
+  phi i4 [ undef, %entry ]
+; CHECK: Function func disallowed: bad operand: {{.*}} i4
+
+  phi i33 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i33
+
+  phi i128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i128
+
+
+  ; Disallowed floating point types
+
+  phi half [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} half
+
+  phi x86_fp80 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} x86_fp80
+
+  phi fp128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} fp128
+
+  phi ppc_fp128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} ppc_fp128
+
+  phi x86_mmx [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} x86_mmx
+
+
+  ; Derived types are disallowed too
+
+  phi i32* [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i32*
+
+  phi [1 x i32] [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} [1 x i32]
+
+  phi { i32, float } [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} { i32, float }
+
+  phi void (i32)* [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} void (i32)*
+
+  phi <{ i8, i32 }> [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} <{ i8, i32 }>
+
+  ; Vector types are disallowed
+  phi <2 x i32> [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} <2 x i32>
+
+  ret void
+}
+
+
+; Named types. With the current implementation, named types are legal
+; until they are actually attempted to be used. Might want to fix that.
+%struct.s1 = type { half, float}
+%struct.s2 = type { i32, i32}
+
+define internal void @func2() {
+entry:
+  br label %block
+block:
+
+  phi %struct.s1 [ undef, %entry ]
+; CHECK: disallowed: bad operand: {{.*}} %struct.s1
+
+  phi %struct.s2 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} %struct.s2
+
+  ret void
+}
+
+
+; Circularities:  here to make sure the verifier doesn't crash or assert.
+
+; This oddity is perfectly legal according to the IR and ABI verifiers.
+; Might want to fix that. (good luck initializing one of these, though.)
+%struct.snake = type { i32, %struct.tail }
+%struct.tail = type { %struct.snake, i32 }
+
+%struct.linked = type { i32, %struct.linked * }
+
+define internal void @func3() {
+entry:
+  br label %block
+block:
+
+  phi %struct.snake [ undef, %entry ]
+; CHECK: disallowed: bad operand: {{.*}} %struct.snake
+
+  phi %struct.linked [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} %struct.linked
+
+  ret void
+}
diff --git a/test/NaCl/X86/intrinsics-bitmanip.ll b/test/NaCl/X86/intrinsics-bitmanip.ll
new file mode 100644
index 0000000000..ff20ec9b38
--- /dev/null
+++ b/test/NaCl/X86/intrinsics-bitmanip.ll
@@ -0,0 +1,74 @@
+; RUN: pnacl-llc -mtriple=i686-unknown-nacl -O0 -filetype=asm %s -o - | \
+; RUN:   FileCheck %s --check-prefix=NACL32
+; RUN: pnacl-llc -mtriple=i686-unknown-nacl -filetype=asm %s -o - | \
+; RUN:   FileCheck %s --check-prefix=NACL32
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -O0 -filetype=asm %s -o - | \
+; RUN:   FileCheck %s --check-prefix=NACL64
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=asm %s -o - | \
+; RUN:   FileCheck %s --check-prefix=NACL64
+
+; Test that various bit manipulation intrinsics are supported by the
+; NaCl X86-32 and X86-64 backends.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+; NACL32: test_bswap_16
+; NACL32: rolw $8, %{{.*}}
+; NACL64: test_bswap_16
+; NACL64: rolw $8, %{{.*}}
+define i16 @test_bswap_16(i16 %a) {
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  ret i16 %b
+}
+
+; NACL32: test_bswap_const_16
+; NACL32: movw $-12885, %ax # imm = 0xFFFFFFFFFFFFCDAB
+; NACL64: test_bswap_const_16
+; NACL64: movw $-12885, %ax # imm = 0xFFFFFFFFFFFFCDAB
+define i16 @test_bswap_const_16() {
+  ; 0xabcd
+  %a = call i16 @llvm.bswap.i16(i16 43981)
+  ret i16 %a
+}
+
+; NACL32: test_bswap_32
+; NACL32: bswapl %eax
+; NACL64: test_bswap_32
+; NACL64: bswapl %edi
+define i32 @test_bswap_32(i32 %a) {
+  %b = call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %b
+}
+
+; NACL32: test_bswap_const_32
+; NACL32: movl $32492971, %eax # imm = 0x1EFCDAB
+; NACL64: test_bswap_const_32
+; NACL64: movl $32492971, %eax # imm = 0x1EFCDAB
+define i32 @test_bswap_const_32() {
+  ; 0xabcdef01
+  %a = call i32 @llvm.bswap.i32(i32 2882400001)
+  ret i32 %a
+}
+
+; NACL32: test_bswap_64
+; NACL32: bswapl %e{{.*}}
+; NACL32: bswapl %e{{.*}}
+; NACL64: test_bswap_64
+; NACL64: bswapq %rdi
+define i64 @test_bswap_64(i64 %a) {
+  %b = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %b
+}
+
+; NACL32: test_bswap_const_64
+; NACL32: movl $32492971, %eax # imm = 0x1EFCDAB
+; NACL32: movl $-1989720797, %edx # imm = 0xFFFFFFFF89674523
+; NACL64: test_bswap_const_64
+; NACL64: movabsq	$-8545785751253561941, %rax # imm = 0x8967452301EFCDAB
+define i64 @test_bswap_const_64(i64 %a) {
+  ; 0xabcdef01 23456789
+  %b = call i64 @llvm.bswap.i64(i64 12379813738877118345)
+  ret i64 %b
+}
diff --git a/test/NaCl/X86/lit.local.cfg b/test/NaCl/X86/lit.local.cfg
new file mode 100644
index 0000000000..56bf008595
--- /dev/null
+++ b/test/NaCl/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.s']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+    config.unsupported = True
+
diff --git a/test/NaCl/X86/nacl-read-tp-intrinsic.ll b/test/NaCl/X86/nacl-read-tp-intrinsic.ll
new file mode 100644
index 0000000000..91a957b4c4
--- /dev/null
+++ b/test/NaCl/X86/nacl-read-tp-intrinsic.ll
@@ -0,0 +1,43 @@
+; RUN: pnacl-llc -mtriple=i386-unknown-nacl -filetype=asm %s -o - \
+; RUN:   | FileCheck -check-prefix=X32 %s
+
+; RUN: pnacl-llc -mtriple=i386-unknown-nacl -filetype=asm -mtls-use-call %s -o - \
+; RUN:   | FileCheck -check-prefix=USE_CALL %s
+
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=asm %s -o - \
+; RUN:   | FileCheck -check-prefix=USE_CALL %s
+
+; "-mtls-use-call" should not make any difference on x86-64.
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=asm -mtls-use-call %s -o - \
+; RUN:   | FileCheck -check-prefix=USE_CALL %s
+
+
+declare i8* @llvm.nacl.read.tp()
+
+define i8* @get_thread_pointer() {
+  %tp = call i8* @llvm.nacl.read.tp()
+  ret i8* %tp
+}
+
+; X32: get_thread_pointer:
+; X32: movl %gs:0, %eax
+
+; USE_CALL: get_thread_pointer:
+; USE_CALL: naclcall __nacl_read_tp
+
+
+; Make sure that we do not generate:
+;   movl $1000, %eax
+;   addl %gs:0, %eax
+; The x86-32 NaCl validator only accepts %gs with "mov", not with
+; "add".  Note that we had to use a large immediate to trigger the bug
+; and generate the code above.
+define i8* @get_thread_pointer_add() {
+  %tp = call i8* @llvm.nacl.read.tp()
+  %result = getelementptr i8* %tp, i32 1000
+  ret i8* %result
+}
+
+; X32: get_thread_pointer_add:
+; X32: movl %gs:0, %eax
+; X32: addl $1000, %eax
diff --git a/test/NaCl/X86/nacl-setlongjmp-intrinsics.ll b/test/NaCl/X86/nacl-setlongjmp-intrinsics.ll
new file mode 100644
index 0000000000..b98297b021
--- /dev/null
+++ b/test/NaCl/X86/nacl-setlongjmp-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: pnacl-llc -mtriple=i386-unknown-nacl -filetype=asm %s -o - \
+; RUN:  | FileCheck %s --check-prefix=X86
+; Test that @llvm.nacl.{set|long}jmp intrinsics calls get translated to library
+; calls as expected.
+
+declare i32 @llvm.nacl.setjmp(i8*)
+declare void @llvm.nacl.longjmp(i8*, i32)
+
+define void @foo(i8* %arg) {
+  %num = call i32 @llvm.nacl.setjmp(i8* %arg)
+; X86: naclcall setjmp
+
+  call void @llvm.nacl.longjmp(i8* %arg, i32 %num)
+; X86: naclcall longjmp
+
+  ret void
+}
+
diff --git a/test/NaCl/X86/nacl64-addrmodes.ll b/test/NaCl/X86/nacl64-addrmodes.ll
new file mode 100644
index 0000000000..9384f1910b
--- /dev/null
+++ b/test/NaCl/X86/nacl64-addrmodes.ll
@@ -0,0 +1,131 @@
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=asm %s -O0 -o - \
+; RUN:   | FileCheck %s
+
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=asm %s -O2 -o - \
+; RUN:   | FileCheck %s
+
+; Check that we don't try to fold a negative displacement into a memory
+; reference
+define i16 @negativedisp(i32 %b) {
+; CHECK: negativedisp
+  %a = alloca [1 x i16], align 2
+  %add = add nsw i32 1073741824, %b
+  %arrayidx = getelementptr inbounds [1 x i16]* %a, i32 0, i32 %add
+; CHECK-NOT: nacl:-2147483648(
+  %c = load i16* %arrayidx, align 2
+  ret i16 %c
+}
+
+@main.m2 = internal constant [1 x [1 x i32]] [[1 x i32] [i32 -60417067]], align 4
+define i1 @largeconst() nounwind {
+; CHECK: largeconst
+entry:
+  %retval = alloca i32, align 4
+  %i = alloca i32, align 4
+  %j = alloca i32, align 4
+  %madat = alloca i32*, align 4
+  store i32 0, i32* %retval
+  store i32 -270770481, i32* %i, align 4
+  store i32 -1912319477, i32* %j, align 4
+  %0 = load i32* %j, align 4
+  %mul = mul nsw i32 %0, 233468377
+  %add = add nsw i32 %mul, 689019309
+  %1 = load i32* %i, align 4
+  %mul1 = mul nsw i32 %1, 947877507
+  %add2 = add nsw i32 %mul1, 1574375955
+  %arrayidx = getelementptr inbounds [1 x i32]* getelementptr inbounds ([1 x [1 x i32]]* @main.m2, i32 0, i32 0), i32 %add2
+  %2 = bitcast [1 x i32]* %arrayidx to i32*
+  %arrayidx3 = getelementptr inbounds i32* %2, i32 %add
+  store i32* %arrayidx3, i32** %madat, align 4
+; Ensure the large constant doesn't get folded into the load
+; CHECK: nacl:(%r15
+  %3 = load i32** %madat, align 4
+  %4 = load i32* %3, align 4
+  %conv = zext i32 %4 to i64
+  %5 = load i32* %j, align 4
+  %mul4 = mul nsw i32 %5, 233468377
+  %add5 = add nsw i32 %mul4, 689019309
+  %6 = load i32* %i, align 4
+  %mul6 = mul nsw i32 %6, 947877507
+  %add7 = add nsw i32 %mul6, 1574375955
+  %arrayidx8 = getelementptr inbounds [1 x i32]* getelementptr inbounds ([1 x [1 x i32]]* @main.m2, i32 0, i32 0), i32 %add7
+  %7 = bitcast [1 x i32]* %arrayidx8 to i32*
+  %arrayidx9 = getelementptr inbounds i32* %7, i32 %add5
+; Ensure the large constant doesn't get folded into the load
+; CHECK: nacl:(%r15
+  %8 = load i32* %arrayidx9, align 4
+  %conv10 = zext i32 %8 to i64
+  %mul11 = mul nsw i64 3795428823, %conv10
+  %9 = load i32* %j, align 4
+  %mul12 = mul nsw i32 %9, 233468377
+  %add13 = add nsw i32 %mul12, 689019309
+  %conv14 = sext i32 %add13 to i64
+  %rem = srem i64 %conv14, 4294967295
+  %xor = xor i64 2597389499, %rem
+  %mul15 = mul nsw i64 %xor, 3795428823
+  %sub = sub nsw i64 %mul11, %mul15
+  %add16 = add nsw i64 %sub, 3829710203
+  %mul17 = mul nsw i64 %add16, 2824337475
+  %add18 = add nsw i64 %mul17, 2376483023
+  %cmp = icmp eq i64 %conv, %add18
+  ret i1 %cmp
+}
+
+
+@main.array = private unnamed_addr constant [1 x i64] [i64 1438933078946427748], align 8
+
+define i1 @largeconst_frameindex() nounwind {
+; CHECK: largeconst_frameindex
+entry:
+  %retval = alloca i32, align 4
+  %r_Ng = alloca i64, align 8
+  %i = alloca i32, align 4
+  %adat = alloca i64*, align 4
+  %array = alloca [1 x i64], align 8
+  store i32 0, i32* %retval
+  store i32 -270770481, i32* %i, align 4
+  %0 = bitcast [1 x i64]* %array to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* bitcast ([1 x i64]* @main.array to i8*), i32 8, i32 8, i1 false)
+  store i32 -270770481, i32* %i, align 4
+  %1 = load i32* %i, align 4
+  %mul = mul i32 %1, 947877507
+  %add = add i32 %mul, 1574375955
+  %2 = bitcast [1 x i64]* %array to i64*
+  %arrayidx = getelementptr inbounds i64* %2, i32 %add
+; Ensure the large constant didn't get folded into the load
+; CHECK: nacl:(%r15
+  %3 = load i64* %arrayidx, align 8
+  %add1 = add i64 %3, -5707596139582126917
+  %4 = load i32* %i, align 4
+  %mul2 = mul i32 %4, 947877507
+  %add3 = add i32 %mul2, 1574375955
+  %5 = bitcast [1 x i64]* %array to i64*
+  %arrayidx4 = getelementptr inbounds i64* %5, i32 %add3
+  store i64 %add1, i64* %arrayidx4, align 8
+  %6 = load i32* %i, align 4
+  %mul5 = mul nsw i32 %6, 947877507
+  %add6 = add nsw i32 %mul5, 1574375955
+  %arrayidx7 = getelementptr inbounds [1 x i64]* %array, i32 0, i32 %add6
+; CHECK: nacl:(%r15
+  %7 = load i64* %arrayidx7, align 8
+  %add8 = add i64 %7, -5707596139582126917
+  %8 = load i32* %i, align 4
+  %mul9 = mul nsw i32 %8, 947877507
+  %add10 = add nsw i32 %mul9, 1574375955
+  %arrayidx11 = getelementptr inbounds [1 x i64]* %array, i32 0, i32 %add10
+  store i64 %add8, i64* %arrayidx11, align 8
+  %9 = load i32* %i, align 4
+  %mul12 = mul nsw i32 %9, 947877507
+  %add13 = add nsw i32 %mul12, 1574375955
+  %10 = bitcast [1 x i64]* %array to i64*
+  %arrayidx14 = getelementptr inbounds i64* %10, i32 %add13
+  store i64* %arrayidx14, i64** %adat, align 4
+  %11 = load i64** %adat, align 4
+  %12 = load i64* %11, align 8
+  %mul15 = mul i64 %12, -1731288434922394955
+  %add16 = add i64 %mul15, -7745351015538694962
+  store i64 %add16, i64* %r_Ng, align 8
+  ret i1 0
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/NaCl/X86/negative-addend.ll b/test/NaCl/X86/negative-addend.ll
new file mode 100644
index 0000000000..e46f091cc0
--- /dev/null
+++ b/test/NaCl/X86/negative-addend.ll
@@ -0,0 +1,27 @@
+; RUN: pnacl-llc -mtriple=i386-unknown-nacl -filetype=obj %s -o - \
+; RUN:   | llvm-objdump -r - | FileCheck %s -check-prefix=X8632
+; RUN: pnacl-llc -mtriple=x86_64-unknown-nacl -filetype=obj %s -o - \
+; RUN:   | llvm-objdump -r - | FileCheck %s -check-prefix=X8664
+
+; Check that "add" works for negative values when used as a
+; ConstantExpr in a global variable initializer.
+; See: https://code.google.com/p/nativeclient/issues/detail?id=3548
+
+
+; @spacer and @var end up in the BSS section.
+; @spacer is at offset 0.  @var is at offset 4096 = 0x1000.
+
+@spacer = internal global [4096 x i8] zeroinitializer
+@var = internal global i32 zeroinitializer
+
+@negative_offset = internal global i32 add
+    (i32 ptrtoint (i32* @var to i32), i32 -8)
+
+; Note that the addend 4294971384 below equals 0x100000ff8, where
+; 0xff8 comes from subtracting 8 from the offset of @var.
+
+; X8632: RELOCATION RECORDS FOR [.data]:
+; X8632-NEXT: 0 R_386_32 Unknown
+
+; X8664: RELOCATION RECORDS FOR [.data]:
+; X8664-NEXT: 0 R_X86_64_32 .bss+4294971384
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index 730e2b0802..366f61f083 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -1,4 +1,8 @@
 ; RUN: opt -S -globalopt < %s | FileCheck %s
+; LOCALMOD: We've changed the heuristic used to detect "main" for the GlobalOpt
+; optimization of replacing globals with allocas. Revert this when fixed
+; properly upstream (http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-July/063580.html)
+; XFAIL: *
 
 ; PR6112 - When globalopt does RAUW(@G, %G), the metadata reference should drop
 ; to null.  Function local metadata that references @G from a different function
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 81ceef8c41..d8e3be3c94 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -1,17 +1,24 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 ; <rdar://problem/8558713>
 
+; @LOCALMOD-BEGIN
+; PNaCl does not support the with.overflow intrinsics in its stable
+; ABI, so these optimizations are disabled.
+
+; RUN: opt -S -instcombine < %s | FileCheck %s -check-prefix=PNACL
+; PNACL-NOT: with.overflow
+
 declare void @throwAnExceptionOrWhatever()
 
 ; CHECK: @test1
 define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 entry:
-; CHECK-NOT: sext
+; C;HECK-NOT: sext
   %conv = sext i32 %a to i64
   %conv2 = sext i32 %b to i64
   %add = add nsw i64 %conv2, %conv
   %add.off = add i64 %add, 2147483648
-; CHECK: llvm.sadd.with.overflow.i32
+; C;HECK: llvm.sadd.with.overflow.i32
   %0 = icmp ugt i64 %add.off, 4294967295
   br i1 %0, label %if.then, label %if.end
 
@@ -20,9 +27,9 @@ if.then:
   br label %if.end
 
 if.end:
-; CHECK-NOT: trunc
+; C;HECK-NOT: trunc
   %conv9 = trunc i64 %add to i32
-; CHECK: ret i32
+; C;HECK: ret i32
   ret i32 %conv9
 }
 
@@ -86,7 +93,7 @@ entry:
   %add4 = add nsw i32 %add, 128
   %cmp = icmp ugt i32 %add4, 255
   br i1 %cmp, label %if.then, label %if.end
-; CHECK: llvm.sadd.with.overflow.i8
+; C;HECK: llvm.sadd.with.overflow.i8
 if.then:                                          ; preds = %entry
   tail call void @throwAnExceptionOrWhatever() nounwind
   unreachable
@@ -98,7 +105,7 @@ if.end:                                           ; preds = %entry
 }
 
 ; CHECK: @test5
-; CHECK: llvm.uadd.with.overflow
+; C;HECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test5(i64 %a, i64 %b) nounwind ssp {
 entry:
@@ -109,7 +116,7 @@ entry:
 }
 
 ; CHECK: @test6
-; CHECK: llvm.uadd.with.overflow
+; C;HECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test6(i64 %a, i64 %b) nounwind ssp {
 entry:
@@ -120,7 +127,7 @@ entry:
 }
 
 ; CHECK: @test7
-; CHECK: llvm.uadd.with.overflow
+; C;HECK: llvm.uadd.with.overflow
 ; CHECK: ret i64
 define i64 @test7(i64 %a, i64 %b) nounwind ssp {
 entry:
@@ -153,3 +160,5 @@ if.end:
   %conv9 = trunc i64 %add to i32
   ret i32 %conv9
 }
+
+; @LOCALMOD-END
diff --git a/test/Transforms/NaCl/add-pnacl-external-decls.ll b/test/Transforms/NaCl/add-pnacl-external-decls.ll
new file mode 100644
index 0000000000..1f525a9268
--- /dev/null
+++ b/test/Transforms/NaCl/add-pnacl-external-decls.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -add-pnacl-external-decls -S | FileCheck %s
+
+declare void @foobar(i32)
+
+; CHECK: declare i32 @setjmp(i8*)
+; CHECK: declare void @longjmp(i8*, i32)
diff --git a/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll
new file mode 100644
index 0000000000..9c263fd15e
--- /dev/null
+++ b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -canonicalize-mem-intrinsics -S | FileCheck %s
+; RUN: opt %s -canonicalize-mem-intrinsics -S \
+; RUN:     | FileCheck %s -check-prefix=CLEANED
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+; CLEANED-NOT: @llvm.mem{{.*}}i64
+
+
+define void @memset_caller(i8* %dest, i8 %char, i64 %size) {
+  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memset_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+define void @memcpy_caller(i8* %dest, i8* %src, i64 %size) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memcpy_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+define void @memmove_caller(i8* %dest, i8* %src, i64 %size) {
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memmove_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+; Check that constant sizes remain as constants.
+
+define void @memset_caller_const(i8* %dest, i8 %char) {
+  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 123, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memset_caller
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 123, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/expand-arith-with-overflow.ll b/test/Transforms/NaCl/expand-arith-with-overflow.ll
new file mode 100644
index 0000000000..bff6388e11
--- /dev/null
+++ b/test/Transforms/NaCl/expand-arith-with-overflow.ll
@@ -0,0 +1,85 @@
+; RUN: opt %s -expand-arith-with-overflow -expand-struct-regs -S | FileCheck %s
+; RUN: opt %s -expand-arith-with-overflow -expand-struct-regs -S | \
+; RUN:     FileCheck %s -check-prefix=CLEANUP
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+
+; CLEANUP-NOT: with.overflow
+; CLEANUP-NOT: extractvalue
+; CLEANUP-NOT: insertvalue
+
+
+define void @umul32_by_const(i32 %x, i32* %result_val, i1* %result_overflow) {
+  %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 256)
+  %val = extractvalue {i32, i1} %pair, 0
+  %overflow = extractvalue {i32, i1} %pair, 1
+
+  store i32 %val, i32* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+
+; The bound is 16777215 == 0xffffff == ((1 << 32) - 1) / 256
+; CHECK: define void @umul32_by_const(
+; CHECK-NEXT: %pair.arith = mul i32 %x, 256
+; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 16777215
+; CHECK-NEXT: store i32 %pair.arith, i32* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+; Check that the pass can expand multiple uses of the same intrinsic.
+define void @umul32_by_const2(i32 %x, i32* %result_val, i1* %result_overflow) {
+  %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 65536)
+  %val = extractvalue {i32, i1} %pair, 0
+  ; Check that the pass can expand multiple uses of %pair.
+  %overflow1 = extractvalue {i32, i1} %pair, 1
+  %overflow2 = extractvalue {i32, i1} %pair, 1
+
+  store i32 %val, i32* %result_val
+  store i1 %overflow1, i1* %result_overflow
+  store i1 %overflow2, i1* %result_overflow
+  ret void
+}
+
+; CHECK: define void @umul32_by_const2(
+; CHECK-NEXT: %pair.arith = mul i32 %x, 65536
+; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 65535
+; CHECK-NEXT: store i32 %pair.arith, i32* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+define void @umul64_by_const(i64 %x, i64* %result_val, i1* %result_overflow) {
+  ; Multiply by 1 << 55.
+  %pair = call {i64, i1} @llvm.umul.with.overflow.i64(i64 36028797018963968, i64 %x)
+  %val = extractvalue {i64, i1} %pair, 0
+  %overflow = extractvalue {i64, i1} %pair, 1
+
+  store i64 %val, i64* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+
+; CHECK: define void @umul64_by_const(i64 %x, i64* %result_val, i1* %result_overflow) {
+; CHECK-NEXT: %pair.arith = mul i64 %x, 36028797018963968
+; CHECK-NEXT: %pair.overflow = icmp ugt i64 %x, 511
+; CHECK-NEXT: store i64 %pair.arith, i64* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+define void @uadd16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) {
+  %pair = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 35)
+  %val = extractvalue {i16, i1} %pair, 0
+  %overflow = extractvalue {i16, i1} %pair, 1
+
+  store i16 %val, i16* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+; CHECK: define void @uadd16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) {
+; CHECK-NEXT: %pair.arith = add i16 %x, 35
+; CHECK-NEXT: %pair.overflow = icmp ugt i16 %x, -36
+; CHECK-NEXT: store i16 %pair.arith, i16* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
diff --git a/test/Transforms/NaCl/expand-byval.ll b/test/Transforms/NaCl/expand-byval.ll
new file mode 100644
index 0000000000..151e36a825
--- /dev/null
+++ b/test/Transforms/NaCl/expand-byval.ll
@@ -0,0 +1,123 @@
+; RUN: opt -expand-byval %s -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+%MyStruct = type { i32, i8, i32 }
+%AlignedStruct = type { double, double }
+
+
+; Removal of "byval" attribute for passing structs arguments by value
+
+declare void @ext_func(%MyStruct*)
+
+define void @byval_receiver(%MyStruct* byval align 32 %ptr) {
+  call void @ext_func(%MyStruct* %ptr)
+  ret void
+}
+; Strip the "byval" and "align" attributes.
+; CHECK: define void @byval_receiver(%MyStruct* noalias %ptr) {
+; CHECK-NEXT: call void @ext_func(%MyStruct* %ptr)
+
+
+declare void @ext_byval_func(%MyStruct* byval)
+; CHECK: declare void @ext_byval_func(%MyStruct* noalias)
+
+define void @byval_caller(%MyStruct* %ptr) {
+  call void @ext_byval_func(%MyStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller(%MyStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 4
+; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}})
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 12, i32 0, i1 false)
+; CHECK-NEXT: call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+
+
+define void @byval_tail_caller(%MyStruct* %ptr) {
+  tail call void @ext_byval_func(%MyStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_tail_caller(%MyStruct* %ptr) {
+; CHECK: {{^}} call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+
+
+define void @byval_invoke(%MyStruct* %ptr) {
+  invoke void @ext_byval_func(%MyStruct* byval align 32 %ptr)
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret void
+}
+; CHECK: define void @byval_invoke(%MyStruct* %ptr) {
+; CHECK: %ptr.byval_copy = alloca %MyStruct, align 32
+; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}})
+; CHECK: invoke void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+; CHECK: cont:
+; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}})
+; CHECK: lpad:
+; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}})
+
+
+; Check handling of alignment
+
+; Check that "align" is stripped for declarations too.
+declare void @ext_byval_func_align(%MyStruct* byval align 32)
+; CHECK: declare void @ext_byval_func_align(%MyStruct* noalias)
+
+define void @byval_caller_align_via_attr(%MyStruct* %ptr) {
+  call void @ext_byval_func(%MyStruct* byval align 32 %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller_align_via_attr(%MyStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 32
+; The memcpy may assume that %ptr is 32-byte-aligned.
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 12, i32 32, i1 false)
+
+declare void @ext_byval_func_align_via_type(%AlignedStruct* byval)
+
+; %AlignedStruct contains a double so requires an alignment of 8 bytes.
+; Looking at the alignment of %AlignedStruct is a workaround for a bug
+; in pnacl-clang:
+; https://code.google.com/p/nativeclient/issues/detail?id=3403
+define void @byval_caller_align_via_type(%AlignedStruct* %ptr) {
+  call void @ext_byval_func_align_via_type(%AlignedStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller_align_via_type(%AlignedStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %AlignedStruct, align 8
+; Don't assume that %ptr is 8-byte-aligned when doing the memcpy.
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 16, i32 0, i1 false)
+
+
+; Removal of "sret" attribute for returning structs by value
+
+declare void @ext_sret_func(%MyStruct* sret align 32)
+; CHECK: declare void @ext_sret_func(%MyStruct*)
+
+define void @sret_func(%MyStruct* sret align 32 %buf) {
+  ret void
+}
+; CHECK: define void @sret_func(%MyStruct* %buf) {
+
+define void @sret_caller(%MyStruct* %buf) {
+  call void @ext_sret_func(%MyStruct* sret align 32 %buf)
+  ret void
+}
+; CHECK: define void @sret_caller(%MyStruct* %buf) {
+; CHECK-NEXT: call void @ext_sret_func(%MyStruct* %buf)
+
+
+; Check that other attributes are preserved
+
+define void @inreg_attr(%MyStruct* inreg %ptr) {
+  ret void
+}
+; CHECK: define void @inreg_attr(%MyStruct* inreg %ptr) {
+
+declare void @func_attrs() #0
+; CHECK: declare void @func_attrs() #0
+
+attributes #0 = { noreturn nounwind }
+; CHECK: attributes #0 = { noreturn nounwind }
diff --git a/test/Transforms/NaCl/expand-constantexpr.ll b/test/Transforms/NaCl/expand-constantexpr.ll
new file mode 100644
index 0000000000..e8786d4cac
--- /dev/null
+++ b/test/Transforms/NaCl/expand-constantexpr.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -expand-constant-expr -S | FileCheck %s
+
+@global_var1 = global i32 123
+@global_var2 = global i32 123
+
+
+define i8* @constantexpr_bitcast() {
+  ret i8* bitcast (i32* @global_var1 to i8*)
+}
+; CHECK: @constantexpr_bitcast
+; CHECK: %expanded = bitcast i32* @global_var1 to i8*
+; CHECK: ret i8* %expanded
+
+
+define i32 @constantexpr_nested() {
+  ret i32 add (i32 ptrtoint (i32* @global_var1 to i32),
+               i32 ptrtoint (i32* @global_var2 to i32))
+}
+; CHECK: @constantexpr_nested
+; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32
+; CHECK: %expanded2 = ptrtoint i32* @global_var2 to i32
+; CHECK: %expanded = add i32 %expanded1, %expanded2
+; CHECK: ret i32 %expanded
+
+
+define i32 @constantexpr_nested2() {
+  ret i32 mul (i32 add (i32 ptrtoint (i32* @global_var1 to i32),
+                        i32 ptrtoint (i32* @global_var2 to i32)), i32 2)
+}
+; CHECK: @constantexpr_nested2
+; CHECK: %expanded2 = ptrtoint i32* @global_var1 to i32
+; CHECK: %expanded3 = ptrtoint i32* @global_var2 to i32
+; CHECK: %expanded1 = add i32 %expanded2, %expanded3
+; CHECK: %expanded = mul i32 %expanded1, 2
+; CHECK: ret i32 %expanded
+
+
+define i32 @constantexpr_phi() {
+entry:
+  br label %label
+label:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ]
+  ret i32 %result
+}
+; CHECK: @constantexpr_phi
+; CHECK: entry:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: br label %label
+; CHECK: label:
+; CHECK: %result = phi i32 [ %expanded, %entry ]
+
+
+; This tests that ExpandConstantExpr correctly handles a PHI node that
+; contains the same ConstantExpr twice.
+; Using replaceAllUsesWith() is not correct on a PHI node when the
+; new instruction has to be added to an incoming block.
+define i32 @constantexpr_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %iftrue ],
+                    [ ptrtoint (i32* @global_var1 to i32), %iffalse ]
+  ret i32 %result
+}
+; CHECK: @constantexpr_phi_twice
+; CHECK: iftrue:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: iffalse:
+; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32
+; CHECK: exit:
+
+
+define i32 @constantexpr_phi_multiple_entry(i1 %arg) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ],
+                    [ ptrtoint (i32* @global_var1 to i32), %entry ]
+  ret i32 %result
+}
+; CHECK: @constantexpr_phi_multiple_entry
+; CHECK: entry:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: br i1 %arg, label %done, label %done
+; CHECK: done:
+; CHECK: %result = phi i32 [ %expanded, %entry ], [ %expanded, %entry ]
+
+
+
+declare void @external_func()
+declare void @personality_func()
+
+define void @test_landingpad() {
+  invoke void @external_func() to label %ok unwind label %onerror
+ok:
+  ret void
+onerror:
+  %lp = landingpad i32
+      personality i8* bitcast (void ()* @personality_func to i8*)
+      catch i32* null
+  ret void
+}
+; landingpad can only accept a ConstantExpr, so this should remain
+; unmodified.
+; CHECK: @test_landingpad
+; CHECK: personality i8* bitcast (void ()* @personality_func to i8*)
diff --git a/test/Transforms/NaCl/expand-ctors-empty.ll b/test/Transforms/NaCl/expand-ctors-empty.ll
new file mode 100644
index 0000000000..f0788a0873
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-empty.ll
@@ -0,0 +1,12 @@
+; Currently we do not define __{init,fini}_array_end as named aliases.
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+; If llvm.global_ctors is not present, it is treated as if it is an
+; empty array, and __{init,fini}_array_start are defined anyway.
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors-emptylist.ll b/test/Transforms/NaCl/expand-ctors-emptylist.ll
new file mode 100644
index 0000000000..6ab68852b9
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-emptylist.ll
@@ -0,0 +1,13 @@
+; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+; NO_CTORS-NOT: llvm.global_ctors
+
+; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s
+
+; Check that the pass works when the initializer is "[]", which gets
+; converted into "undef" by the reader.
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] []
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors-zeroinit.ll b/test/Transforms/NaCl/expand-ctors-zeroinit.ll
new file mode 100644
index 0000000000..824b2b23b7
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-zeroinit.ll
@@ -0,0 +1,17 @@
+; Currently we do not define __{init,fini}_array_end as named aliases.
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; We expect this symbol to be removed:
+; RUN: opt < %s -nacl-expand-ctors -S | not grep llvm.global_ctors
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+; If llvm.global_ctors is zeroinitializer, it should be treated the
+; same as an empty array.
+
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors.ll b/test/Transforms/NaCl/expand-ctors.ll
new file mode 100644
index 0000000000..250abbc1bf
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors.ll
@@ -0,0 +1,37 @@
+; We expect these symbol names to be removed:
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: llvm.global.ctors
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+@llvm.global_ctors = appending global [3 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 300, void ()* @init_func_A },
+   { i32, void ()* } { i32 100, void ()* @init_func_B },
+   { i32, void ()* } { i32 200, void ()* @init_func_C }]
+
+@__init_array_start = extern_weak global [0 x void ()*]
+@__init_array_end = extern_weak global [0 x void ()*]
+
+; CHECK: @__init_array_start = internal constant [3 x void ()*] [void ()* @init_func_B, void ()* @init_func_C, void ()* @init_func_A]
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
+
+define void @init_func_A() { ret void }
+define void @init_func_B() { ret void }
+define void @init_func_C() { ret void }
+
+define [0 x void ()*]* @get_array_start() {
+  ret [0 x void ()*]* @__init_array_start;
+}
+; CHECK: @get_array_start()
+; CHECK: ret {{.*}} @__init_array_start
+
+define [0 x void ()*]* @get_array_end() {
+  ret [0 x void ()*]* @__init_array_end;
+}
+
+; @get_array_end() is converted to use a GetElementPtr that returns
+; the end of the generated array:
+; CHECK: @get_array_end()
+; CHECK: ret {{.*}} bitcast ([3 x void ()*]* getelementptr inbounds ([3 x void ()*]* @__init_array_start, i32 1)
diff --git a/test/Transforms/NaCl/expand-getelementptr.ll b/test/Transforms/NaCl/expand-getelementptr.ll
new file mode 100644
index 0000000000..9f5a4bd8d2
--- /dev/null
+++ b/test/Transforms/NaCl/expand-getelementptr.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -expand-getelementptr -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+%MyStruct = type { i8, i32, i8 }
+%MyArray = type { [100 x i64] }
+%MyArrayOneByte = type { [100 x i8] }
+
+
+; Test indexing struct field
+define i8* @test_struct_field(%MyStruct* %ptr) {
+  %addr = getelementptr %MyStruct* %ptr, i32 0, i32 2
+  ret i8* %addr
+}
+; CHECK: @test_struct_field
+; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 8
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i8*
+; CHECK-NEXT: ret i8* %addr
+
+
+; Test non-constant index into an array
+define i64* @test_array_index(%MyArray* %ptr, i32 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i32 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_array = mul i32 %index, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test constant index into an array (as a pointer)
+define %MyStruct* @test_ptr_add(%MyStruct* %ptr) {
+  %addr = getelementptr %MyStruct* %ptr, i32 2
+  ret %MyStruct* %addr
+}
+; CHECK: @test_ptr_add
+; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 24
+; CHECK-NEXT: %addr = inttoptr i32 %gep to %MyStruct*
+; CHECK-NEXT: ret %MyStruct* %addr
+
+
+; Test that additions and multiplications are combined properly
+define i64* @test_add_and_index(%MyArray* %ptr, i32 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 1, i32 0, i32 %index
+  ret i64* %addr
+}
+; CHECK: @test_add_and_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 800
+; CHECK-NEXT: %gep_array = mul i32 %index, 8
+; CHECK-NEXT: %gep1 = add i32 %gep, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test that we don't multiply by 1 unnecessarily
+define i8* @test_add_and_index_one_byte(%MyArrayOneByte* %ptr, i32 %index) {
+  %addr = getelementptr %MyArrayOneByte* %ptr, i32 1, i32 0, i32 %index
+  ret i8* %addr
+}
+; CHECK: @test_add_and_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArrayOneByte* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 100
+; CHECK-NEXT: %gep1 = add i32 %gep, %index
+; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i8*
+; CHECK-NEXT: ret i8* %addr
+
+
+; Test >32-bit array index
+define i64* @test_array_index64(%MyArray* %ptr, i64 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i64 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index64
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_trunc = trunc i64 %index to i32
+; CHECK-NEXT: %gep_array = mul i32 %gep_trunc, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test <32-bit array index
+define i64* @test_array_index16(%MyArray* %ptr, i16 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i16 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index16
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_sext = sext i16 %index to i32
+; CHECK-NEXT: %gep_array = mul i32 %gep_sext, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test >32-bit constant array index
+define i64* @test_array_index64_const(%MyArray* %ptr) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i64 100
+  ret i64* %addr
+}
+; CHECK: @test_array_index64_const
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 800
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test <32-bit constant array index -- test sign extension
+define i64* @test_array_index16_const(%MyArray* %ptr) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i16 -100
+  ret i64* %addr
+}
+; CHECK: @test_array_index16_const
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, -800
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
diff --git a/test/Transforms/NaCl/expand-small-arguments.ll b/test/Transforms/NaCl/expand-small-arguments.ll
new file mode 100644
index 0000000000..48a62d80d7
--- /dev/null
+++ b/test/Transforms/NaCl/expand-small-arguments.ll
@@ -0,0 +1,97 @@
+; RUN: opt %s -expand-small-arguments -S | FileCheck %s
+
+@var = global i8 0
+
+
+define void @small_arg(i8 %val) {
+  store i8 %val, i8* @var
+  ret void
+}
+; CHECK: define void @small_arg(i32 %val) {
+; CHECK-NEXT: %val.arg_trunc = trunc i32 %val to i8
+; CHECK-NEXT: store i8 %val.arg_trunc, i8* @var
+
+
+define i8 @small_result() {
+  %val = load i8* @var
+  ret i8 %val
+}
+; CHECK: define i32 @small_result() {
+; CHECK-NEXT: %val = load i8* @var
+; CHECK-NEXT: %val.ret_ext = zext i8 %val to i32
+; CHECK-NEXT: ret i32 %val.ret_ext
+
+define signext i8 @small_result_signext() {
+  %val = load i8* @var
+  ret i8 %val
+}
+; CHECK: define signext i32 @small_result_signext() {
+; CHECK-NEXT: %val = load i8* @var
+; CHECK-NEXT: %val.ret_ext = sext i8 %val to i32
+; CHECK-NEXT: ret i32 %val.ret_ext
+
+
+define void @call_small_arg() {
+  call void @small_arg(i8 100)
+  ret void
+}
+; CHECK: define void @call_small_arg() {
+; CHECK-NEXT: %arg_ext = zext i8 100 to i32
+; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg
+; CHECK-NEXT: call void %.arg_cast(i32 %arg_ext)
+
+define void @call_small_arg_signext() {
+  call void @small_arg(i8 signext 100)
+  ret void
+}
+; CHECK: define void @call_small_arg_signext() {
+; CHECK-NEXT: %arg_ext = sext i8 100 to i32
+; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg
+; CHECK-NEXT: call void %.arg_cast(i32 signext %arg_ext)
+
+
+define void @call_small_result() {
+  %r = call i8 @small_result()
+  store i8 %r, i8* @var
+  ret void
+}
+; CHECK: define void @call_small_result() {
+; CHECK-NEXT: %r.arg_cast = bitcast {{.*}} @small_result
+; CHECK-NEXT: %r = call i32 %r.arg_cast()
+; CHECK-NEXT: %r.ret_trunc = trunc i32 %r to i8
+; CHECK-NEXT: store i8 %r.ret_trunc, i8* @var
+
+
+; Check that various attributes are preserved.
+define i1 @attributes(i8 %arg) nounwind {
+  %r = tail call fastcc i1 @attributes(i8 %arg) nounwind
+  ret i1 %r
+}
+; CHECK: define i32 @attributes(i32 %arg) [[NOUNWIND:#[0-9]+]] {
+; CHECK: tail call fastcc i32 {{.*}} [[NOUNWIND]]
+
+
+; These arguments and results should be left alone.
+define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) {
+  %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d)
+  ret i64 %r
+}
+; CHECK: define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) {
+; CHECK-NEXT: %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d)
+; CHECK-NEXT: ret i64 %r
+
+
+; Intrinsics must be left alone since the pass cannot change their types.
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+; CHECK: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+define void @intrinsic_call(i8* %ptr) {
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99, i32 256, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @intrinsic_call
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99,
+
+
+; CHECK: attributes [[NOUNWIND]] = { nounwind }
diff --git a/test/Transforms/NaCl/expand-struct-regs.ll b/test/Transforms/NaCl/expand-struct-regs.ll
new file mode 100644
index 0000000000..0cc2c6db85
--- /dev/null
+++ b/test/Transforms/NaCl/expand-struct-regs.ll
@@ -0,0 +1,126 @@
+; RUN: opt %s -expand-struct-regs -S | FileCheck %s
+; RUN: opt %s -expand-struct-regs -S | FileCheck %s -check-prefix=CLEANUP
+
+; These two instructions should not appear in the output:
+; CLEANUP-NOT: extractvalue
+; CLEANUP-NOT: insertvalue
+
+%struct = type { i8, i32 }
+
+
+define void @struct_load(%struct* %p, i8* %out0, i32* %out1) {
+  %val = load %struct* %p
+  %field0 = extractvalue %struct %val, 0
+  %field1 = extractvalue %struct %val, 1
+  store i8 %field0, i8* %out0
+  store i32 %field1, i32* %out1
+  ret void
+}
+; CHECK: define void @struct_load
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %p, i32 0, i32 0
+; CHECK-NEXT: %val.field{{.*}} = load i8* %val.index{{.*}}, align 1
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %p, i32 0, i32 1
+; CHECK-NEXT: %val.field{{.*}} = load i32* %val.index{{.*}}, align 1
+; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out0
+; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out1
+
+
+define void @struct_store(%struct* %in_ptr, %struct* %out_ptr) {
+  %val = load %struct* %in_ptr
+  store %struct %val, %struct* %out_ptr
+  ret void
+}
+; CHECK: define void @struct_store
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %in_ptr, i32 0, i32 0
+; CHECK-NEXT: %val.field{{.*}} = load i8* %val.index{{.*}}, align 1
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %in_ptr, i32 0, i32 1
+; CHECK-NEXT: %val.field{{.*}} = load i32* %val.index{{.*}}, align 1
+; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct* %out_ptr, i32 0, i32 0
+; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out_ptr.index{{.*}}, align 1
+; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct* %out_ptr, i32 0, i32 1
+; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out_ptr.index{{.*}}, align 1
+
+
+; Ensure that the pass works correctly across basic blocks.
+define void @across_basic_block(%struct* %in_ptr, %struct* %out_ptr) {
+  %val = load %struct* %in_ptr
+  br label %bb
+bb:
+  store %struct %val, %struct* %out_ptr
+  ret void
+}
+; CHECK: define void @across_basic_block
+; CHECK: load
+; CHECK: load
+; CHECK: bb:
+; CHECK: store
+; CHECK: store
+
+
+define void @const_struct_store(%struct* %ptr) {
+  store %struct { i8 99, i32 1234 }, %struct* %ptr
+  ret void
+}
+; CHECK: define void @const_struct_store
+; CHECK: store i8 99
+; CHECK: store i32 1234
+
+
+define void @struct_phi_node(%struct* %ptr) {
+entry:
+  %val = load %struct* %ptr
+  br label %bb
+bb:
+  %phi = phi %struct [ %val, %entry ]
+  ret void
+}
+; CHECK: bb:
+; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ]
+; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ]
+
+
+define void @struct_phi_node_multiple_entry(i1 %arg, %struct* %ptr) {
+entry:
+  %val = load %struct* %ptr
+  br i1 %arg, label %bb, label %bb
+bb:
+  %phi = phi %struct [ %val, %entry ], [ %val, %entry ]
+  ret void
+}
+; CHECK: bb:
+; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ]
+; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ]
+
+
+define void @struct_select_inst(i1 %cond, %struct* %ptr1, %struct* %ptr2) {
+  %val1 = load %struct* %ptr1
+  %val2 = load %struct* %ptr2
+  %select = select i1 %cond, %struct %val1, %struct %val2
+  ret void
+}
+; CHECK: define void @struct_select_inst
+; CHECK: %select.index{{.*}} = select i1 %cond, i8 %val1.field{{.*}}, i8 %val2.field{{.*}}
+; CHECK-NEXT: %select.index{{.*}} = select i1 %cond, i32 %val1.field{{.*}}, i32 %val2.field{{.*}}
+
+
+define void @insert_and_extract(i8* %out0, i32* %out1) {
+  %temp = insertvalue %struct undef, i8 100, 0
+  %sval = insertvalue %struct %temp, i32 200, 1
+  %field0 = extractvalue %struct %sval, 0
+  %field1 = extractvalue %struct %sval, 1
+  store i8 %field0, i8* %out0
+  store i32 %field1, i32* %out1
+  ret void
+}
+; CHECK: define void @insert_and_extract(i8* %out0, i32* %out1) {
+; CHECK-NEXT: store i8 100, i8* %out0
+; CHECK-NEXT: store i32 200, i32* %out1
+; CHECK-NEXT: ret void
+
+
+define i32 @extract_from_constant() {
+  %ev = extractvalue %struct { i8 99, i32 888 }, 1
+  ret i32 %ev
+}
+; CHECK: define i32 @extract_from_constant() {
+; CHECK-NEXT: ret i32 888
diff --git a/test/Transforms/NaCl/expand-tls-aligned.ll b/test/Transforms/NaCl/expand-tls-aligned.ll
new file mode 100644
index 0000000000..75f03ba306
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-aligned.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+@var = global i32 123
+
+; Put this first to check that the pass handles BSS variables last.
+@bss_tvar_aligned = thread_local global i32 0, align 64
+
+@tvar1 = thread_local global i16 234
+; Test a pointer to check we are getting the right pointer size.
+@tvar2 = thread_local global i32* @var
+@tvar_aligned = thread_local global i8 99, align 32
+
+
+; CHECK: %tls_init_template = type <{ i16, [2 x i8], i32*, [24 x i8], i8 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+
+; This struct type must be "packed" because the 31 byte padding here
+; is followed by an i32.
+; CHECK: %tls_bss_template = type <{ [31 x i8], i32, [60 x i8] }>
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i16 234, [2 x i8] zeroinitializer, i32* @var, [24 x i8] zeroinitializer, i8 99 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 64
+
+
+; Create references to __tls_template_* to keep these live, otherwise
+; the definition of %tls_struct (which we check for above) is removed
+; from the output.
+
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_tdata_end() {
+  ret i8* @__tls_template_tdata_end
+}
+
+define i8* @get_tls_template_end() {
+  ret i8* @__tls_template_end
+}
diff --git a/test/Transforms/NaCl/expand-tls-bss.ll b/test/Transforms/NaCl/expand-tls-bss.ll
new file mode 100644
index 0000000000..02504611f0
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-bss.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar_bss1 = thread_local global i64 0
+@tvar_bss2 = thread_local global i32 0
+
+
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ i64, i32, [4 x i8] }>
+
+
+define i64* @get_tvar_bss1() {
+  ret i64* @tvar_bss1
+}
+; CHECK: define i64* @get_tvar_bss1()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: ret i64* %field
diff --git a/test/Transforms/NaCl/expand-tls-constexpr-alias.ll b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll
new file mode 100644
index 0000000000..65daa5eacd
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s
+
+@real_tvar = thread_local global i32 123
+@tvar_alias = alias i32* @real_tvar
+@tvar_alias2 = alias i32* getelementptr (i32* @real_tvar, i32 100)
+
+
+define i32* @get_tvar() {
+  ret i32* @tvar_alias
+}
+; CHECK: define i32* @get_tvar()
+; CHECK: ret i32* @real_tvar
+
+
+define i32* @get_tvar2() {
+  ret i32* @tvar_alias2
+}
+; CHECK: define i32* @get_tvar2()
+; CHECK: %expanded = getelementptr i32* @real_tvar, i32 100
+; CHECK: ret i32* %expanded
+
+
+define i32* @get_tvar3() {
+  ret i32* getelementptr (i32* @tvar_alias2, i32 100)
+}
+; CHECK: define i32* @get_tvar3()
+; CHECK: %expanded = getelementptr i32* @real_tvar, i32 200
+; CHECK: ret i32* %expanded
diff --git a/test/Transforms/NaCl/expand-tls-constexpr.ll b/test/Transforms/NaCl/expand-tls-constexpr.ll
new file mode 100644
index 0000000000..b7ab253692
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr.ll
@@ -0,0 +1,152 @@
+; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+
+define i32 @test_converting_ptrtoint() {
+  ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK: define i32 @test_converting_ptrtoint()
+; CHECK: %expanded = ptrtoint i32* @tvar to i32
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_add() {
+  ret i32 add (i32 ptrtoint (i32* @tvar to i32), i32 4)
+}
+; CHECK: define i32 @test_converting_add()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, 4
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_multiple_operands() {
+  ret i32 add (i32 ptrtoint (i32* @tvar to i32),
+               i32 ptrtoint (i32* @tvar to i32))
+}
+; CHECK: define i32 @test_converting_multiple_operands()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, %expanded1
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_allocating_new_var_name(i32 %expanded) {
+  %result = add i32 %expanded, ptrtoint (i32* @tvar to i32)
+  ret i32 %result
+}
+; CHECK: define i32 @test_allocating_new_var_name(i32 %expanded)
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %result = add i32 %expanded, %expanded1
+; CHECK: ret i32 %result
+
+
+define i8* @test_converting_bitcast() {
+  ret i8* bitcast (i32* @tvar to i8*)
+}
+; CHECK: define i8* @test_converting_bitcast()
+; CHECK: %expanded = bitcast i32* @tvar to i8*
+; CHECK: ret i8* %expanded
+
+
+define i32* @test_converting_getelementptr() {
+  ; Use an index >1 to ensure that "inbounds" is not added automatically.
+  ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+; This is identical to @test_converting_getelementptr().
+; We need to check that both copies of getelementptr are fixed.
+define i32* @test_converting_getelementptr_copy() {
+  ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr_copy()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_getelementptr_inbounds() {
+  ret i32* getelementptr inbounds (i32* @tvar, i32 2)
+}
+; CHECK: define i32* @test_converting_getelementptr_inbounds()
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_phi(i1 %cmp) {
+entry:
+  br i1 %cmp, label %return, label %else
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+  ret i32* %result
+}
+; The converted ConstantExprs get pushed back into the PHI node's
+; incoming block, which might be suboptimal but works in all cases.
+; CHECK: define i32* @test_converting_phi(i1 %cmp)
+; CHECK: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK: else:
+; CHECK: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
+
+
+@addr1 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %return)
+@addr2 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %else)
+define i32* @test_converting_phi_with_indirectbr(i8* %addr) {
+entry:
+  indirectbr i8* %addr, [ label %return, label %else ]
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+  ret i32* %result
+}
+; CHECK: define i32* @test_converting_phi_with_indirectbr(i8* %addr)
+; CHECK: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
+
+
+; This tests that ExpandTlsConstantExpr correctly handles a PHI node
+; that contains the same ConstantExpr twice.  Using
+; replaceAllUsesWith() is not correct on a PHI node when the new
+; instruction has to be added to an incoming block.
+define i32 @test_converting_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32 [ ptrtoint (i32* @tvar to i32), %iftrue ],
+                    [ ptrtoint (i32* @tvar to i32), %iffalse ]
+  ret i32 %result
+}
+; CHECK: define i32 @test_converting_phi_twice(i1 %arg)
+; CHECK: iftrue:
+; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32
+; CHECK: iffalse:
+; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32
+; CHECK: exit:
+; CHECK: %result = phi i32 [ %expanded1, %iftrue ], [ %expanded, %iffalse ]
+
+
+define i32 @test_converting_phi_multiple_entry(i1 %arg) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i32 [ ptrtoint (i32* @tvar to i32), %entry ],
+                    [ ptrtoint (i32* @tvar to i32), %entry ]
+  ret i32 %result
+}
+; CHECK: define i32 @test_converting_phi_multiple_entry(i1 %arg)
+; CHECK: %result = phi i32 [ %expanded, %entry ], [ %expanded, %entry ]
diff --git a/test/Transforms/NaCl/expand-tls-constexpr2.ll b/test/Transforms/NaCl/expand-tls-constexpr2.ll
new file mode 100644
index 0000000000..ca7054961b
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr2.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+define i32 @get_tvar() {
+  ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: %expanded = ptrtoint i32* %field to i32
+; CHECK: ret i32 %expanded
diff --git a/test/Transforms/NaCl/expand-tls-phi.ll b/test/Transforms/NaCl/expand-tls-phi.ll
new file mode 100644
index 0000000000..4aa0a7a32c
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-phi.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar = thread_local global i32 123
+
+define i32* @get_tvar(i1 %cmp) {
+entry:
+  br i1 %cmp, label %return, label %else
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ @tvar, %entry ], [ null, %else ]
+  ret i32* %result
+}
+; The TLS access gets pushed back into the PHI node's incoming block,
+; which might be suboptimal but works in all cases.
+; CHECK: define i32* @get_tvar(i1 %cmp) {
+; CHECK: entry:
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: else:
+; CHECK: return:
+; CHECK: %result = phi i32* [ %field, %entry ], [ null, %else ]
+
+
+; This tests that ExpandTls correctly handles a PHI node that contains
+; the same TLS variable twice.  Using replaceAllUsesWith() is not
+; correct on a PHI node when the new instruction has to be added to an
+; incoming block.
+define i32* @tls_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32* [ @tvar, %iftrue ], [ @tvar, %iffalse ]
+  ret i32* %result
+}
+; CHECK: define i32* @tls_phi_twice(i1 %arg) {
+; CHECK: iftrue:
+; CHECK: %field{{.*}} = getelementptr %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0
+; CHECK: iffalse:
+; CHECK: %field{{.*}} = getelementptr %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0
+; CHECK: exit:
+; CHECK: %result = phi i32* [ %field{{.*}}, %iftrue ], [ %field{{.*}}, %iffalse ]
+
+
+; In this corner case, ExpandTls must expand out @tvar only once,
+; otherwise it will produce invalid IR.
+define i32* @tls_phi_multiple_entry(i1 %arg) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i32* [ @tvar, %entry ], [ @tvar, %entry ]
+  ret i32* %result
+}
+; CHECK: define i32* @tls_phi_multiple_entry(i1 %arg) {
+; CHECK: %result = phi i32* [ %field, %entry ], [ %field, %entry ]
diff --git a/test/Transforms/NaCl/expand-tls.ll b/test/Transforms/NaCl/expand-tls.ll
new file mode 100644
index 0000000000..b824ec074e
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+; All thread-local variables should be removed
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s -check-prefix=NO_TLS
+
+; NO_TLS-NOT: thread_local
+
+@tvar1 = thread_local global i64 123
+@tvar2 = thread_local global i32 456
+
+
+; CHECK: %tls_init_template = type <{ i64, i32 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ [4 x i8] }>
+
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i64 123, i32 456 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 8
+
+
+define i64* @get_tvar1() {
+  ret i64* @tvar1
+}
+; CHECK: define i64* @get_tvar1()
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: ret i64* %field
+
+
+define i32* @get_tvar2() {
+  ret i32* @tvar2
+}
+; Much the same as for get_tvar1.
+; CHECK: define i32* @get_tvar2()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 1
+
+
+; Check that we define global variables for TLS templates
+
+@__tls_template_start = external global i8
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_start() {
+  ret i8* @__tls_template_start
+}
+; CHECK: define i8* @get_tls_template_start()
+; CHECK: ret i8* bitcast (%tls_init_template* @__tls_template_start to i8*)
+
+define i8* @get_tls_template_tdata_end() {
+  ret i8* @__tls_template_tdata_end
+}
+; CHECK: define i8* @get_tls_template_tdata_end()
+; CHECK: ret i8* bitcast (%tls_init_template* getelementptr inbounds (%tls_init_template* @__tls_template_start, i32 1) to i8*)
+
+define i8* @get_tls_template_end() {
+  ret i8* @__tls_template_end
+}
+; CHECK: define i8* @get_tls_template_end()
+; CHECK: ret i8* bitcast (%tls_struct* getelementptr (%tls_struct* bitcast (%tls_init_template* @__tls_template_start to %tls_struct*), i32 1) to i8*)
+
+
+; Check that we expand out the TLS layout intrinsics
+
+declare i32 @llvm.nacl.tp.tls.offset(i32)
+declare i32 @llvm.nacl.tp.tdb.offset(i32)
+
+define i32 @test_get_tp_tls_offset(i32 %tls_size) {
+  %offset = call i32 @llvm.nacl.tp.tls.offset(i32 %tls_size)
+  ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK: define i32 @test_get_tp_tls_offset
+; CHECK: call i32 @nacl_tp_tls_offset
+; NO_TLS-NOT: llvm.nacl.tp.tls.offset
+
+define i32 @test_get_tp_tdb_offset(i32 %tdb_size) {
+  %offset = call i32 @llvm.nacl.tp.tdb.offset(i32 %tdb_size)
+  ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK: define i32 @test_get_tp_tdb_offset
+; CHECK: call i32 @nacl_tp_tdb_offset
+; NO_TLS-NOT: llvm.nacl.tp.tdb.offset
diff --git a/test/Transforms/NaCl/expand-varargs-attrs.ll b/test/Transforms/NaCl/expand-varargs-attrs.ll
new file mode 100644
index 0000000000..d95a572d6b
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs-attrs.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+declare i32 @varargs_func(i32 %arg, ...)
+
+
+; Check that attributes such as "byval" are preserved on fixed arguments.
+
+%MyStruct = type { i64, i64 }
+
+define void @func_with_arg_attrs(%MyStruct* byval, ...) {
+  ret void
+}
+; CHECK: define void @func_with_arg_attrs(%MyStruct* byval, i8* noalias %varargs) {
+
+
+declare void @take_struct_arg(%MyStruct* byval %s, ...)
+
+define void @call_with_arg_attrs(%MyStruct* %s) {
+  call void (%MyStruct*, ...)* @take_struct_arg(%MyStruct* byval %s)
+  ret void
+}
+; CHECK: define void @call_with_arg_attrs(%MyStruct* %s) {
+; CHECK: call void %vararg_func(%MyStruct* byval %s, <{ i32 }>* %vararg_buffer)
+
+
+; The "byval" attribute here should be dropped.
+define i32 @pass_struct_via_vararg1(%MyStruct* %s) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, %MyStruct* byval %s)
+  ret i32 %result
+}
+; CHECK: define i32 @pass_struct_via_vararg1(%MyStruct* %s) {
+; CHECK: %result = call i32 %vararg_func(i32 111, <{ %MyStruct }>* %vararg_buffer)
+
+
+; The "byval" attribute here should be dropped.
+define i32 @pass_struct_via_vararg2(%MyStruct* %s) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i32 2, %MyStruct* byval %s)
+  ret i32 %result
+}
+; CHECK: define i32 @pass_struct_via_vararg2(%MyStruct* %s) {
+; CHECK: %result = call i32 %vararg_func(i32 111, <{ i32, %MyStruct }>* %vararg_buffer)
+
+
+; Check that return attributes such as "signext" are preserved.
+define i32 @call_with_return_attr() {
+  %result = call signext i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+  ret i32 %result
+}
+; CHECK: define i32 @call_with_return_attr() {
+; CHECK: %result = call signext i32 %vararg_func(i32 111
+
+
+; Check that the "readonly" function attribute is preserved.
+define i32 @call_readonly() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222) readonly
+  ret i32 %result
+}
+; CHECK: define i32 @call_readonly() {
+; CHECK: %result = call i32 %vararg_func(i32 111, {{.*}}) #1
+
+
+; Check that the "tail" attribute gets removed, because the callee
+; reads space alloca'd by the caller.
+define i32 @tail_call() {
+  %result = tail call i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+  ret i32 %result
+}
+; CHECK: define i32 @tail_call() {
+; CHECK: %result = call i32 %vararg_func(i32 111
+
+
+; CHECK: attributes #1 = { readonly }
diff --git a/test/Transforms/NaCl/expand-varargs-struct.ll b/test/Transforms/NaCl/expand-varargs-struct.ll
new file mode 100644
index 0000000000..b96b41875c
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs-struct.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+declare i32 @varargs_func(i32 %arg, ...)
+
+
+%MyStruct = type { i64, i64 }
+
+; Test passing a struct by value.
+define i32 @varargs_call_struct(%MyStruct* %ptr) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222, %MyStruct* byval %ptr)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_call_struct(%MyStruct* %ptr) {
+; CHECK: %vararg_ptr1 = getelementptr <{ i64, %MyStruct }>* %vararg_buffer, i32 0, i32 1
+; CHECK: %1 = bitcast %MyStruct* %vararg_ptr1 to i8*
+; CHECK: %2 = bitcast %MyStruct* %ptr to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 16, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/expand-varargs.ll b/test/Transforms/NaCl/expand-varargs.ll
new file mode 100644
index 0000000000..56e722a9a8
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+%va_list = type i8*
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+
+declare i32 @outside_func(i32 %arg, %va_list* %args)
+
+define i32 @varargs_func(i32 %arg, ...) {
+  %arglist_alloc = alloca %va_list
+  %arglist = bitcast %va_list* %arglist_alloc to i8*
+
+  call void @llvm.va_start(i8* %arglist)
+  %result = call i32 @outside_func(i32 %arg, %va_list* %arglist_alloc)
+  call void @llvm.va_end(i8* %arglist)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_func(i32 %arg, i8* noalias %varargs) {
+; CHECK-NEXT: %arglist_alloc = alloca i8*
+; CHECK-NEXT: %arglist = bitcast i8** %arglist_alloc to i8*
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i8**
+; CHECK-NEXT: store i8* %varargs, i8** %arglist1
+; CHECK-NEXT: %result = call i32 @outside_func(i32 %arg, i8** %arglist_alloc)
+; CHECK-NEXT: ret i32 %result
+
+
+define i32 @varargs_call1() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222, i32 333)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_call1() {
+; CHECK-NEXT: %vararg_buffer = alloca <{ i64, i32 }>
+; CHECK-NEXT: %vararg_lifetime_bitcast = bitcast <{ i64, i32 }>* %vararg_buffer to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start(i64 12, i8* %vararg_lifetime_bitcast)
+; CHECK-NEXT: %vararg_ptr = getelementptr <{ i64, i32 }>* %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i64 222, i64* %vararg_ptr
+; CHECK-NEXT: %vararg_ptr1 = getelementptr <{ i64, i32 }>* %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 333, i32* %vararg_ptr1
+; CHECK-NEXT: %vararg_func = bitcast i32 (i32, ...)* bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, ...)*) to i32 (i32, <{ i64, i32 }>*)*
+; CHECK-NEXT: %result = call i32 %vararg_func(i32 111, <{ i64, i32 }>* %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 12, i8* %vararg_lifetime_bitcast)
+; CHECK-NEXT: ret i32 %result
+
+
+; Check that the pass works when there are no variable arguments.
+define i32 @call_with_zero_varargs() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111)
+  ret i32 %result
+}
+; CHECK: define i32 @call_with_zero_varargs() {
+; We have a dummy i32 field to deal with buggy programs:
+; CHECK-NEXT: %vararg_buffer = alloca <{ i32 }>
+; CHECK: %vararg_func = bitcast i32 (i32, ...)* bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, ...)*) to i32 (i32, <{ i32 }>*)*
+; CHECK-NEXT: %result = call i32 %vararg_func(i32 111, <{ i32 }>* %vararg_buffer)
+
+
+; Check that "invoke" instructions are expanded out too.
+define i32 @varargs_invoke() {
+  %result = invoke i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 0
+}
+; CHECK: @varargs_invoke
+; CHECK: %result = invoke i32 %vararg_func(i32 111, <{ i64 }>* %vararg_buffer)
+; CHECK-NEXT: to label %cont unwind label %lpad
+; CHECK: cont:
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+; CHECK: lpad:
+; CHECK: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+
+
+define void @varargs_multiple_calls() {
+  %call1 = call i32 (i32, ...)* @varargs_func(i32 11, i64 22, i32 33)
+  %call2 = call i32 (i32, ...)* @varargs_func(i32 44, i64 55, i32 66)
+  ret void
+}
+; CHECK: @varargs_multiple_calls()
+; The added allocas should appear at the start of the function.
+; CHECK: %vararg_buffer{{.*}} = alloca <{ i64, i32 }>
+; CHECK: %vararg_buffer{{.*}} = alloca <{ i64, i32 }>
+; CHECK: %call1 = call i32 %vararg_func{{.*}}(i32 11, <{ i64, i32 }>* %vararg_buffer{{.*}})
+; CHECK: %call2 = call i32 %vararg_func{{.*}}(i32 44, <{ i64, i32 }>* %vararg_buffer{{.*}})
+
+
+define i32 @va_arg_i32(i8* %arglist) {
+  %result = va_arg i8* %arglist, i32
+  ret i32 %result
+}
+; CHECK: define i32 @va_arg_i32(i8* %arglist) {
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i32**
+; CHECK-NEXT: %arglist_current = load i32** %arglist1
+; CHECK-NEXT: %result = load i32* %arglist_current
+; CHECK-NEXT: %arglist_next = getelementptr i32* %arglist_current, i32 1
+; CHECK-NEXT: store i32* %arglist_next, i32** %arglist1
+; CHECK-NEXT: ret i32 %result
+
+
+define i64 @va_arg_i64(i8* %arglist) {
+  %result = va_arg i8* %arglist, i64
+  ret i64 %result
+}
+; CHECK: define i64 @va_arg_i64(i8* %arglist) {
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i64**
+; CHECK-NEXT: %arglist_current = load i64** %arglist1
+; CHECK-NEXT: %result = load i64* %arglist_current
+; CHECK-NEXT: %arglist_next = getelementptr i64* %arglist_current, i32 1
+; CHECK-NEXT: store i64* %arglist_next, i64** %arglist1
+; CHECK-NEXT: ret i64 %result
+
+
+define void @do_va_copy(i8* %dest, i8* %src) {
+  call void @llvm.va_copy(i8* %dest, i8* %src)
+  ret void
+}
+; CHECK: define void @do_va_copy(i8* %dest, i8* %src) {
+; CHECK-NEXT: %vacopy_src = bitcast i8* %src to i8**
+; CHECK-NEXT: %vacopy_dest = bitcast i8* %dest to i8**
+; CHECK-NEXT: %vacopy_currentptr = load i8** %vacopy_src
+; CHECK-NEXT: store i8* %vacopy_currentptr, i8** %vacopy_dest
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/NaCl/flatten-globals.ll b/test/Transforms/NaCl/flatten-globals.ll
new file mode 100644
index 0000000000..565eb4edc6
--- /dev/null
+++ b/test/Transforms/NaCl/flatten-globals.ll
@@ -0,0 +1,200 @@
+; RUN: opt -flatten-globals %s -S | FileCheck %s
+; RUN: opt -flatten-globals %s -S | FileCheck %s -check-prefix=CLEANED
+
+target datalayout = "p:32:32:32"
+
+
+; Check simple cases
+
+@var_i32 = global i32 258
+; CHECK: @var_i32 = global [4 x i8] c"\02\01\00\00"
+; CHECK-CLEANED-NOT: global i32 258
+
+@external_var = external global i32
+; CHECK: @external_var = external global [4 x i8]
+
+@zero_init = global i32 0
+; CHECK: @zero_init = global [4 x i8] zeroinitializer
+
+@big_zero_init = global [2000 x i8] zeroinitializer
+; CHECK: @big_zero_init = global [2000 x i8] zeroinitializer
+
+@null_ptr = global i32* null
+; CHECK: @null_ptr = global [4 x i8] zeroinitializer
+
+@undef_value = global i32 undef
+; CHECK: @undef_value = global [4 x i8] zeroinitializer
+
+
+; Check various data types
+
+@var_i1 = global i8 1
+; CHECK: @var_i1 = global [1 x i8] c"\01"
+
+@var_i8 = global i8 65
+; CHECK: @var_i8 = global [1 x i8] c"A"
+
+@var_i16 = global i16 258
+; CHECK: @var_i16 = global [2 x i8] c"\02\01"
+
+@var_i64 = global i64 72623859790382856
+; CHECK: @var_i64 = global [8 x i8] c"\08\07\06\05\04\03\02\01"
+
+@var_i128 = global i128 1339673755198158349044581307228491536
+; CHECK: @var_i128 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01"
+
+; Check that padding bits come out as zero.
+@var_i121 = global i121 1339673755198158349044581307228491536
+; CHECK: @var_i121 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01"
+
+@var_double = global double 123.456
+; CHECK: @var_double = global [8 x i8] c"w\BE\9F\1A/\DD^@"
+
+@var_float = global float 123.0
+; CHECK: @var_float = global [4 x i8] c"\00\00\F6B"
+
+
+; Check aggregates
+
+@padded_struct = global { i8, i8, i32 } { i8 65, i8 66, i32 258 }
+; CHECK: @padded_struct = global [8 x i8] c"AB\00\00\02\01\00\00"
+
+@packed_struct = global <{ i8, i8, i32 }> <{ i8 67, i8 68, i32 258 }>
+; CHECK: @packed_struct = global [6 x i8] c"CD\02\01\00\00"
+
+@i8_array = global [6 x i8] c"Hello\00"
+; CHECK: @i8_array = global [6 x i8] c"Hello\00"
+
+@i16_array = global [3 x i16] [ i16 1, i16 2, i16 3 ]
+; CHECK: @i16_array = global [6 x i8] c"\01\00\02\00\03\00"
+
+%s = type { i8, i8 }
+@struct_array = global [2 x %s] [%s { i8 1, i8 2 }, %s { i8 3, i8 4 }]
+; CHECK: @struct_array = global [4 x i8] c"\01\02\03\04"
+
+@vector = global <2 x i32> <i32 259, i32 520>
+; CHECK: @vector = global [8 x i8] c"\03\01\00\00\08\02\00\00"
+
+
+; Check that various attributes are preserved
+
+@constant_var = constant i32 259
+; CHECK: @constant_var = constant [4 x i8] c"\03\01\00\00"
+
+@weak_external_var = extern_weak global i32
+; CHECK: @weak_external_var = extern_weak global [4 x i8]
+
+@tls_var = external thread_local global i32
+; CHECK: @tls_var = external thread_local global [4 x i8]
+
+@aligned_var = global i32 260, align 8
+; CHECK: @aligned_var = global [4 x i8] c"\04\01\00\00", align 8
+
+
+; Check alignment handling
+
+@implicit_alignment_i32 = global i32 zeroinitializer
+; CHECK: @implicit_alignment_i32 = global [4 x i8] zeroinitializer, align 4
+
+@implicit_alignment_double = global double zeroinitializer
+; CHECK: @implicit_alignment_double = global [8 x i8] zeroinitializer, align 8
+
+; FlattenGlobals is not allowed to increase the alignment of the
+; variable when an explicit section is specified (although PNaCl does
+; not support this attribute).
+@lower_alignment_section = global i32 0, section "mysection", align 1
+; CHECK: @lower_alignment_section = global [4 x i8] zeroinitializer, section "mysection", align 1
+
+; FlattenGlobals could increase the alignment when no section is
+; specified, but it does not.
+@lower_alignment = global i32 0, align 1
+; CHECK: @lower_alignment = global [4 x i8] zeroinitializer, align 1
+
+
+; Check handling of global references
+
+@var1 = external global i32
+@var2 = external global i8
+
+%ptrs1 = type { i32*, i8*, i32 }
+@ptrs1 = global %ptrs1 { i32* @var1, i8* null, i32 259 }
+; CHECK: @ptrs1 = global <{ i32, [8 x i8] }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [8 x i8] c"\00\00\00\00\03\01\00\00" }>
+
+%ptrs2 = type { i32, i32*, i8* }
+@ptrs2 = global %ptrs2 { i32 259, i32* @var1, i8* @var2 }
+; CHECK: @ptrs2 = global <{ [4 x i8], i32, i32 }> <{ [4 x i8] c"\03\01\00\00", i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([1 x i8]* @var2 to i32) }>
+
+%ptrs3 = type { i32*, [3 x i8], i8* }
+@ptrs3 = global %ptrs3 { i32* @var1, [3 x i8] c"foo", i8* @var2 }
+; CHECK: @ptrs3 = global <{ i32, [4 x i8], i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [4 x i8] c"foo\00", i32 ptrtoint ([1 x i8]* @var2 to i32) }>
+
+@ptr = global i32* @var1
+; CHECK: @ptr = global i32 ptrtoint ([4 x i8]* @var1 to i32)
+
+@func_ptr = global i32* ()* @get_address
+; CHECK: @func_ptr = global i32 ptrtoint (i32* ()* @get_address to i32)
+
+@block_addr = global i8* blockaddress(@func_with_block, %label)
+; CHECK: @block_addr = global i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32)
+
+@vector_reloc = global <2 x i32*> <i32* @var1, i32* @var1>
+; CHECK: global <{ i32, i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([4 x i8]* @var1 to i32) }>
+
+
+; Global references with addends
+
+@reloc_addend = global i32* getelementptr (%ptrs1* @ptrs1, i32 0, i32 2)
+; CHECK: @reloc_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 8)
+
+@negative_addend = global %ptrs1* getelementptr (%ptrs1* @ptrs1, i32 -1)
+; CHECK: @negative_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 -12)
+
+@const_ptr = global i32* getelementptr (%ptrs1* null, i32 0, i32 2)
+; CHECK: @const_ptr = global [4 x i8] c"\08\00\00\00"
+
+@int_to_ptr = global i32* inttoptr (i16 260 to i32*)
+; CHECK: @int_to_ptr = global [4 x i8] c"\04\01\00\00"
+
+; Clang allows "(uintptr_t) &var" as a global initializer, so we
+; handle this case.
+@ptr_to_int = global i32 ptrtoint (i8* @var2 to i32)
+; CHECK: @ptr_to_int = global i32 ptrtoint ([1 x i8]* @var2 to i32)
+
+; This is handled via Constant folding.  The getelementptr is
+; converted to an undef when it is created, so the pass does not see a
+; getelementptr here.
+@undef_gep = global i32* getelementptr (%ptrs1* undef, i32 0, i32 2)
+; CHECK: @undef_gep = global [4 x i8] zeroinitializer
+
+; Adding an offset to a function address isn't useful, but check that
+; the pass handles it anyway.
+@func_addend = global i8* getelementptr (
+    i8* bitcast (void ()* @func_with_block to i8*), i32 123)
+; CHECK: @func_addend = global i32 add (i32 ptrtoint (void ()* @func_with_block to i32), i32 123)
+
+; Similarly, adding an offset to a label address isn't useful, but
+; check it anyway.
+@block_addend = global i8* getelementptr (
+    i8* blockaddress(@func_with_block, %label), i32 100)
+; CHECK: @block_addend = global i32 add (i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32), i32 100)
+
+
+; Special cases
+
+; Leave vars with "appending" linkage alone.
+@appending = appending global [1 x i32*] [i32* @var1]
+; CHECK: @appending = appending global [1 x i32*] [i32* bitcast ([4 x i8]* @var1 to i32*)]
+
+
+define i32* @get_address() {
+  ret i32* @var_i32
+}
+; CHECK: define i32* @get_address() {
+; CHECK-NEXT: ret i32* bitcast ([4 x i8]* @var_i32 to i32*)
+
+
+define void @func_with_block() {
+  br label %label
+label:
+  ret void
+}
diff --git a/test/Transforms/NaCl/globalcleanup.ll b/test/Transforms/NaCl/globalcleanup.ll
new file mode 100644
index 0000000000..44e5b45e16
--- /dev/null
+++ b/test/Transforms/NaCl/globalcleanup.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -nacl-global-cleanup -S | FileCheck %s
+; RUN: opt < %s -nacl-global-cleanup -S | FileCheck -check-prefix=GV %s
+
+@llvm.compiler.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
+@llvm.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
+
+; GV-NOT: llvm.used
+; GV-NOT: llvm.compiler.used
+
+@extern_weak_const = extern_weak constant i32
+@extern_weak_gv = extern_weak global i32
+
+; GV-NOT: @extern_weak_const
+; GV-NOT: @extern_weak_gv
+
+; CHECK: @weak_gv = internal global
+@weak_gv = weak global i32 0
+
+; CHECK: define void @_start
+define void @_start() {
+  ret void
+}
+
+define i32* @ewgv() {
+; CHECK: %bc = getelementptr i8* null, i32 0
+  %bc = getelementptr i8* bitcast (i32* @extern_weak_gv to i8*), i32 0
+; CHECK: ret i32* null
+  ret i32* @extern_weak_gv
+}
+
+define i32* @ewc() {
+; CHECK: %bc = getelementptr i8* null, i32 0
+  %bc = getelementptr i8* bitcast (i32* @extern_weak_const to i8*), i32 0
+; CHECK: ret i32* null
+  ret i32* @extern_weak_gv
+}
+
+; GV-NOT: @extern_weak_func
+declare extern_weak i32 @extern_weak_func()
+; CHECK: @ewf
+define i32 @ewf() {
+; CHECK: %ret = call i32 null()
+  %ret = call i32 @extern_weak_func()
+  ret i32 %ret
+}
+
+; CHECK: define internal void @weak_func
+define weak void @weak_func() {
+  ret void
+}
diff --git a/test/Transforms/NaCl/lit.local.cfg b/test/Transforms/NaCl/lit.local.cfg
new file mode 100644
index 0000000000..a43fd3ebdd
--- /dev/null
+++ b/test/Transforms/NaCl/lit.local.cfg
@@ -0,0 +1,3 @@
+# -*- Python -*-
+
+config.suffixes = ['.ll']
diff --git a/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll
new file mode 100644
index 0000000000..e6e14b6690
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s
+
+; Checks that PNaCl ABI pre-opt simplification correctly internalizes
+; symbols except _start.
+
+define void @main() {
+; CHECK: define internal void @main
+  ret void
+}
+
+define external void @foobarbaz() {
+; CHECK: define internal void @foobarbaz
+  ret void
+}
+
+define void @_start() {
+; CHECK: define void @_start
+  ret void
+}
+
diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll
new file mode 100644
index 0000000000..87a4f48dd5
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -pnacl-abi-simplify-postopt -S | FileCheck %s
+; RUN: opt %s -pnacl-abi-simplify-postopt -S \
+; RUN:     | FileCheck %s -check-prefix=CLEANUP
+
+; "-pnacl-abi-simplify-postopt" runs various passes which are tested
+; thoroughly in other *.ll files.  This file is a smoke test to check
+; that the passes work together OK.
+
+
+@var = global i32 256
+; CHECK: @var = global [4 x i8]
+
+define i16 @read_var() {
+  %val = load i16* bitcast (i32* @var to i16*)
+  ret i16 %val
+}
+; CHECK: = bitcast [4 x i8]* @var
+; CHECK-NEXT: load i16*
+
+; Check that dead prototypes are successfully removed.
+declare void @unused_prototype(i8*)
+; CLEANUP-NOT: unused_prototype
diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll
new file mode 100644
index 0000000000..1cf7377559
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll
@@ -0,0 +1,38 @@
+; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s
+
+; "-pnacl-abi-simplify-preopt" runs various passes which are tested
+; thoroughly in other *.ll files.  This file is a smoke test to check
+; that "-pnacl-abi-simplify-preopt" runs what it's supposed to run.
+
+declare void @ext_func()
+
+
+define void @invoke_func() {
+  invoke void @ext_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret void
+}
+; CHECK-NOT: invoke void @ext_func()
+; CHECK-NOT: landingpad
+
+
+define void @varargs_func(...) {
+  ret void
+}
+; CHECK-NOT: @varargs_func(...)
+
+
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer
+; CHECK-NOT: @llvm.global_ctors
+
+@tls_var = thread_local global i32 0
+; CHECK-NOT: thread_local
+
+@alias = alias i32* @tls_var
+; CHECK-NOT: @alias
+
+@weak_ref = extern_weak global i8*
+; CHECK-NOT: extern_weak
diff --git a/test/Transforms/NaCl/promote-i1-ops.ll b/test/Transforms/NaCl/promote-i1-ops.ll
new file mode 100644
index 0000000000..245004b681
--- /dev/null
+++ b/test/Transforms/NaCl/promote-i1-ops.ll
@@ -0,0 +1,77 @@
+; RUN: opt %s -nacl-promote-i1-ops -S | FileCheck %s
+
+; Test that the PromoteI1Ops pass expands out i1 loads/stores and i1
+; comparison and arithmetic operations, with the exception of "and",
+; "or" and "xor".
+
+
+; i1 loads and stores are converted to i8 load and stores with
+; explicit casts.
+
+define i1 @load(i1* %ptr) {
+  %val = load i1* %ptr
+  ret i1 %val
+}
+; CHECK: define i1 @load
+; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8*
+; CHECK-NEXT: %val.pre_trunc = load i8* %ptr.i8ptr
+; CHECK-NEXT: %val = trunc i8 %val.pre_trunc to i1
+
+define void @store(i1 %val, i1* %ptr) {
+  store i1 %val, i1* %ptr
+  ret void
+}
+; CHECK: define void @store
+; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8*
+; CHECK-NEXT: %val.expand_i1_val = zext i1 %val to i8
+; CHECK-NEXT: store i8 %val.expand_i1_val, i8* %ptr.i8ptr
+
+
+; i1 arithmetic and comparisons are converted to their i8 equivalents
+; with explicit casts.
+
+define i1 @add(i1 %x, i1 %y) {
+  %result = add i1 %x, %y
+  ret i1 %result
+}
+; CHECK: define i1 @add
+; CHECK-NEXT: %x.expand_i1_val = zext i1 %x to i8
+; CHECK-NEXT: %y.expand_i1_val = zext i1 %y to i8
+; CHECK-NEXT: %result.pre_trunc = add i8 %x.expand_i1_val, %y.expand_i1_val
+; CHECK-NEXT: %result = trunc i8 %result.pre_trunc to i1
+
+define i1 @compare(i1 %x, i1 %y) {
+  %result = icmp slt i1 %x, %y
+  ret i1 %result
+}
+; CHECK: define i1 @compare
+; CHECK-NEXT: %x.expand_i1_val = sext i1 %x to i8
+; CHECK-NEXT: %y.expand_i1_val = sext i1 %y to i8
+; CHECK-NEXT: %result = icmp slt i8 %x.expand_i1_val, %y.expand_i1_val
+
+
+; Non-shift bitwise operations should not be modified.
+define void @bitwise_ops(i1 %x, i1 %y) {
+  %and = and i1 %x, %y
+  %or = or i1 %x, %y
+  %xor = xor i1 %x, %y
+  ret void
+}
+; CHECK: define void @bitwise_ops
+; CHECK-NEXT: %and = and i1 %x, %y
+; CHECK-NEXT: %or = or i1 %x, %y
+; CHECK-NEXT: %xor = xor i1 %x, %y
+
+
+define void @unchanged_cases(i32 %x, i32 %y, i32* %ptr) {
+  %add = add i32 %x, %y
+  %cmp = icmp slt i32 %x, %y
+  %val = load i32* %ptr
+  store i32 %x, i32* %ptr
+  ret void
+}
+; CHECK: define void @unchanged_cases
+; CHECK-NEXT: %add = add i32 %x, %y
+; CHECK-NEXT: %cmp = icmp slt i32 %x, %y
+; CHECK-NEXT: %val = load i32* %ptr
+; CHECK-NEXT: store i32 %x, i32* %ptr
diff --git a/test/Transforms/NaCl/promote-integers.ll b/test/Transforms/NaCl/promote-integers.ll
new file mode 100644
index 0000000000..7fca6e1078
--- /dev/null
+++ b/test/Transforms/NaCl/promote-integers.ll
@@ -0,0 +1,374 @@
+; RUN: opt < %s -nacl-promote-ints -S | FileCheck %s
+
+declare void @consume_i16(i16 %a)
+
+; CHECK: @sext_to_illegal
+; CHECK-NEXT: %a40.sext = sext i32 %a to i64
+; CHECK-NEXT: %a40 = and i64 %a40.sext, 1099511627775
+; (0xFFFFFFFFFF)
+define void @sext_to_illegal(i32 %a) {
+  %a40 = sext i32 %a to i40
+  ret void
+}
+
+; CHECK; @sext_from_illegal
+define void @sext_from_illegal(i8 %a) {
+; CHECK: call void @consume_i16(i16 -2)
+  %c12 = sext i12 -2 to i16
+  call void @consume_i16(i16 %c12)
+; CHECK: %a12.sext = sext i8 %a to i16
+; CHECK-NEXT: %a12 = and i16 %a12.sext, 4095
+  %a12 = sext i8 %a to i12
+; CHECK: %a12.getsign = shl i16 %a12, 4
+; CHECK-NEXT: %a16 = ashr i16 %a12.getsign, 4
+  %a16 = sext i12 %a12 to i16
+; CHECK: %a12.getsign1 = shl i16 %a12, 4
+; CHECK-NEXT: %a12.signed = ashr i16 %a12.getsign1, 4
+; CHECK-NEXT: %a14 = and i16 %a12.signed, 16383
+; (0x3FFF)
+  %a14 = sext i12 %a12 to i14
+; CHECK-NEXT: %a12.getsign2 = shl i16 %a12, 4
+; CHECK-NEXT: %a12.signed3 = ashr i16 %a12.getsign2, 4
+; CHECK-NEXT: %a24.sext = sext i16 %a12.signed3 to i32
+; CHECK-NEXT: %a24 = and i32 %a24.sext, 16777215
+; (0xFFFFFF)
+  %a24 = sext i12 %a12 to i24
+
+  %a37 = zext i8 %a to i37
+; CHECK: %a37.getsign = shl i64 %a37, 27
+; CHECK-NEXT: %a64 = ashr i64 %a37.getsign, 27
+  %a64 = sext i37 %a37 to i64
+  ret void
+}
+
+; CHECK: @zext_to_illegal
+define void @zext_to_illegal(i32 %a) {
+; CHECK: zext i32 %a to i64
+; CHECK-NOT: and
+  %a40 = zext i32 %a to i40
+  ret void
+}
+
+; CHECK: @zext_from_illegal
+define void @zext_from_illegal(i8 %a) {
+; get some illegal values to start with
+  %a24 = zext i8 %a to i24
+  %a40 = zext i8 %a to i40
+  %a18 = zext i8 %a to i18
+
+; TODO(dschuff): the ANDs can be no-ops when we zext from an illegal type.
+; CHECK: %a32 = and i32 %a24, 16777215
+; (0xFFFFFF)
+  %a32 = zext i24 %a24 to i32
+
+; CHECK: %b24 = and i32 %a18, 262143
+; (0x3FFFF)
+  %b24 = zext i18 %a18 to i24
+
+; CHECK: %a24.clear = and i32 %a24, 16777215
+; CHECK: %b40 = zext i32 %a24.clear to i64
+  %b40 = zext i24 %a24 to i40
+
+; CHECK: call void @consume_i16(i16 4094)
+  %c16 = zext i12 -2 to i16
+  call void @consume_i16(i16 %c16)
+; CHECK: call void @consume_i16(i16 4094)
+  %c14 = zext i12 -2 to i14
+  %c16.2 = zext i14 %c14 to i16
+  call void @consume_i16(i16 %c16.2)
+  ret void
+}
+
+define void @trunc_from_illegal(i8 %a) {
+  %a24 = zext i8 %a to i24
+; CHECK: %a16 = trunc i32 %a24 to i16
+  %a16 = trunc i24 %a24 to i16
+  ret void
+}
+
+define void @trunc_to_illegal(i32 %a) {
+; CHECK: %a24 = and i32 %a, 16777215
+; (0xFFFFFF)
+  %a24 = trunc i32 %a to i24
+
+; CHECK: %a24.trunc = trunc i32 %a24 to i16
+; CHECK-NEXT: %a12 = and i16 %a24.trunc, 4095
+; (0xFFF)
+  %a12 = trunc i24 %a24 to i12
+  ret void
+}
+
+; CHECK: @icmpsigned
+define void @icmpsigned(i32 %a) {
+  %shl = trunc i32 %a to i24
+; CHECK: %shl.getsign = shl i32 %shl, 8
+; CHECK-NEXT: %shl.signed = ashr i32 %shl.getsign, 8
+; CHECK-NEXT: %cmp = icmp slt i32 %shl.signed, -2
+  %cmp = icmp slt i24 %shl, -2
+  ret void
+}
+
+%struct.ints = type { i32, i32 }
+; CHECK: @bc1
+; CHECK: bc1 = bitcast i32* %a to i64*
+; CHECK-NEXT: bc2 = bitcast i64* %bc1 to i32*
+; CHECK-NEXT: bc3 = bitcast %struct.ints* null to i64*
+; CHECK-NEXT: bc4 = bitcast i64* %bc1 to %struct.ints*
+define i32* @bc1(i32* %a) {
+  %bc1 = bitcast i32* %a to i40*
+  %bc2 = bitcast i40* %bc1 to i32*
+  %bc3 = bitcast %struct.ints* null to i40*
+  %bc4 = bitcast i40* %bc1 to %struct.ints*
+  ret i32* %bc2
+}
+
+; CHECK: zext i32 %a to i64
+; CHECK: and i64 %a40, 255
+define void @and1(i32 %a) {
+  %a40 = zext i32 %a to i40
+  %and = and i40 %a40, 255
+  ret void
+}
+
+; CHECK: @andi3
+define void @andi3(i8 %a) {
+; CHECK-NEXT: and i8 %a, 7
+  %a3 = trunc i8 %a to i3
+; CHECK-NEXT: and i8 %a3, 2
+  %and = and i3 %a3, 2
+  ret void
+}
+
+; CHECK: @ori7
+define void @ori7(i8 %a, i8 %b) {
+  %a7 = trunc i8 %a to i7
+  %b7 = trunc i8 %b to i7
+; CHECK: %or = or i8 %a7, %b7
+  %or = or i7 %a7, %b7
+  ret void
+}
+
+; CHECK: @add1
+define void @add1(i16 %a) {
+; CHECK-NEXT: %a24.sext = sext i16 %a to i32
+; CHECK-NEXT: %a24 = and i32 %a24.sext, 16777215
+  %a24 = sext i16 %a to i24
+; CHECK-NEXT: %sum.result = add i32 %a24, 16777214
+; CHECK-NEXT: %sum = and i32 %sum.result, 16777215
+  %sum = add i24 %a24, -2
+; CHECK-NEXT: %sumnsw.result = add nsw i32 %a24, 16777214
+; CHECK-NEXT: %sumnsw = and i32 %sumnsw.result, 16777215
+  %sumnsw = add nsw i24 %a24, -2
+; CHECK-NEXT: %sumnuw.result = add nuw i32 %a24, 16777214
+; CHECK-NEXT: %sumnuw = and i32 %sumnuw.result, 16777215
+  %sumnuw = add nuw i24 %a24, -2
+; CHECK-NEXT: %sumnw = add nuw nsw i32 %a24, 16777214
+; CHECK-NOT: and
+  %sumnw = add nuw nsw i24 %a24, -2
+  ret void
+}
+
+; CHECK: @shl1
+define void @shl1(i16 %a) {
+  %a24 = zext i16 %a to i24
+; CHECK: %ashl.result = shl i32 %a24, 5
+; CHECK: %ashl = and i32 %ashl.result, 16777215
+  %ashl = shl i24 %a24, 5
+  ret void
+}
+
+; CHECK: @shlnuw
+define void @shlnuw(i16 %a) {
+  %a12 = trunc i16 %a to i12
+; CHECK: %ashl = shl nuw i16 %a12, 5
+; CHECK-NOT: and
+  %ashl = shl nuw i12 %a12, 5
+  ret void
+}
+
+; CHECK: @lshr1
+define void @lshr1(i16 %a) {
+  %a24 = zext i16 %a to i24
+; CHECK: %b = lshr i32 %a24, 20
+  %b = lshr i24 %a24, 20
+; CHECK: %c = lshr i32 %a24, 5
+  %c = lshr i24 %a24, 5
+  ret void
+}
+
+; CHECK: @ashr1
+define void @ashr1(i16 %a) {
+  %a24 = sext i16 %a to i24
+; CHECK: %a24.getsign = shl i32 %a24, 8
+; CHECK-NEXT: %b24.result = ashr i32 %a24.getsign, 19
+; CHECK-NEXT: %b24 = and i32 %b24.result, 16777215
+  %b24 = ashr i24 %a24, 11
+; CHECK-NEXT: %a24.getsign1 = shl i32 %a24, 8
+; CHECK-NEXT: %a24.shamt = add i32 %b24, 8
+; CHECK-NEXT: %c.result = ashr i32 %a24.getsign1, %a24.shamt
+; CHECK-NEXT: %c = and i32 %c.result, 16777215
+  %c = ashr i24 %a24, %b24
+  ret void
+}
+
+; CHECK: @phi_icmp
+define void @phi_icmp(i32 %a) {
+entry:
+  br label %loop
+loop:
+; CHECK: %phi40 = phi i64 [ 1099511627774, %entry ], [ %phi40, %loop ]
+  %phi40 = phi i40 [ -2, %entry ],  [ %phi40, %loop ]
+; CHECK-NEXT: %b = icmp eq i64 %phi40, 1099511627775
+  %b = icmp eq i40 %phi40, -1
+; CHECK-NEXT: br i1 %b, label %loop, label %end
+  br i1 %b, label %loop, label %end
+end:
+  ret void
+}
+
+; CHECK: @icmp_ult
+define void @icmp_ult(i32 %a) {
+  %a40 = zext i32 %a to i40
+; CHECK: %b = icmp ult i64 %a40, 1099511627774
+  %b = icmp ult i40 %a40, -2
+  ret void
+}
+
+; CHECK: @select1
+define void @select1(i32 %a) {
+  %a40 = zext i32 %a to i40
+; CHECK: %s40 = select i1 true, i64 %a40, i64 1099511627775
+  %s40 = select i1 true, i40 %a40, i40 -1
+  ret void
+}
+
+; CHECK: @alloca40
+; CHECK: %a = alloca i64, align 8
+define void @alloca40() {
+  %a = alloca i40, align 8
+  %b = bitcast i40* %a to i8*
+  %c = load i8* %b
+  ret void
+}
+
+; CHECK: @load24
+; CHECK: %bc.loty = bitcast i32* %bc to i16*
+; CHECK-NEXT: %load.lo = load i16* %bc.loty
+; CHECK-NEXT: %load.lo.ext = zext i16 %load.lo to i32
+; CHECK-NEXT: %bc.hi = getelementptr i16* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8*
+; CHECK-NEXT: %load.hi = load i8* %bc.hity
+; CHECK-NEXT: %load.hi.ext = zext i8 %load.hi to i32
+; CHECK-NEXT: %load.hi.ext.sh = shl i32 %load.hi.ext, 16
+; CHECK-NEXT: %load = or i32 %load.lo.ext, %load.hi.ext.sh
+define void @load24(i8* %a) {
+  %bc = bitcast i8* %a to i24*
+  %load = load i24* %bc, align 8
+  ret void
+}
+
+; CHECK: @load48
+; CHECK: %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT: %load.lo = load i32* %bc.loty
+; CHECK-NEXT: %load.lo.ext = zext i32 %load.lo to i64
+; CHECK-NEXT: %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT: %load.hi = load i16* %bc.hity
+; CHECK-NEXT: %load.hi.ext = zext i16 %load.hi to i64
+; CHECK-NEXT: %load.hi.ext.sh = shl i64 %load.hi.ext, 32
+; CHECK-NEXT: %load = or i64 %load.lo.ext, %load.hi.ext.sh
+define void @load48(i32* %a) {
+  %bc = bitcast i32* %a to i48*
+  %load = load i48* %bc, align 8
+  ret void
+}
+
+; CHECK:  %bc = bitcast i32* %a to i64*
+; CHECK-NEXT:  %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT:  %load.lo = load i32* %bc.loty
+; CHECK-NEXT:  %load.lo.ext = zext i32 %load.lo to i64
+; CHECK-NEXT:  %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT:  %bc.hity.loty = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT:  %load.hi.lo = load i16* %bc.hity.loty
+; CHECK-NEXT:  %load.hi.lo.ext = zext i16 %load.hi.lo to i32
+; CHECK-NEXT:  %bc.hity.hi = getelementptr i16* %bc.hity.loty, i32 1
+; CHECK-NEXT:  %bc.hity.hity = bitcast i16* %bc.hity.hi to i8*
+; CHECK-NEXT:  %load.hi.hi = load i8* %bc.hity.hity
+; CHECK-NEXT:  %load.hi.hi.ext = zext i8 %load.hi.hi to i32
+; CHECK-NEXT:  %load.hi.hi.ext.sh = shl i32 %load.hi.hi.ext, 16
+; CHECK-NEXT:  %load.hi = or i32 %load.hi.lo.ext, %load.hi.hi.ext.sh
+; CHECK-NEXT:  %load.hi.ext = zext i32 %load.hi to i64
+; CHECK-NEXT:  %load.hi.ext.sh = shl i64 %load.hi.ext, 32
+; CHECK-NEXT:  %load = or i64 %load.lo.ext, %load.hi.ext.sh
+define void @load56(i32* %a) {
+  %bc = bitcast i32* %a to i56*
+  %load = load i56* %bc
+  ret void
+}
+
+; CHECK: @store24
+; CHECK: %b24 = zext i8 %b to i32
+; CHECK-NEXT: %bc.loty = bitcast i32* %bc to i16*
+; CHECK-NEXT: %b24.lo = trunc i32 %b24 to i16
+; CHECK-NEXT: store i16 %b24.lo, i16* %bc.loty
+; CHECK-NEXT: %b24.hi.sh = lshr i32 %b24, 16
+; CHECK-NEXT: %bc.hi = getelementptr i16* %bc.loty, i32 1
+; CHECK-NEXT: %b24.hi = trunc i32 %b24.hi.sh to i8
+; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8*
+; CHECK-NEXT: store i8 %b24.hi, i8* %bc.hity
+define void @store24(i8* %a, i8 %b) {
+  %bc = bitcast i8* %a to i24*
+  %b24 = zext i8 %b to i24
+  store i24 %b24, i24* %bc
+  ret void
+}
+
+; CHECK: @store56
+; CHECK: %b56 = zext i8 %b to i64
+; CHECK-NEXT: %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT: %b56.lo = trunc i64 %b56 to i32
+; CHECK-NEXT: store i32 %b56.lo, i32* %bc.loty
+; CHECK-NEXT: %b56.hi.sh = lshr i64 %b56, 32
+; CHECK-NEXT: %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity.loty = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT: %b56.hi.sh.lo = trunc i64 %b56.hi.sh to i16
+; CHECK-NEXT: store i16 %b56.hi.sh.lo, i16* %bc.hity.loty
+; CHECK-NEXT: %b56.hi.sh.hi.sh = lshr i64 %b56.hi.sh, 16
+; CHECK-NEXT: %bc.hity.hi = getelementptr i16* %bc.hity.loty, i32 1
+; CHECK-NEXT: %b56.hi.sh.hi = trunc i64 %b56.hi.sh.hi.sh to i8
+; CHECK-NEXT: %bc.hity.hity = bitcast i16* %bc.hity.hi to i8*
+; CHECK-NEXT: store i8 %b56.hi.sh.hi, i8* %bc.hity.hity
+define void @store56(i8* %a, i8 %b) {
+  %bc = bitcast i8* %a to i56*
+  %b56 = zext i8 %b to i56
+  store i56 %b56, i56* %bc
+  ret void
+}
+
+; CHECK: @undefoperand
+; CHECK-NEXT: %a40 = zext i32 %a to i64
+; CHECK-NEXT: %au = and i64 %a40, undef
+define void @undefoperand(i32 %a) {
+  %a40 = zext i32 %a to i40
+  %au = and i40 %a40, undef
+  ret void
+}
+
+; CHECK: @switch
+; CHECK-NEXT: %a24 = zext i16 %a to i32
+; CHECK-NEXT: switch i32 %a24, label %end [
+; CHECK-NEXT: i32 0, label %if1
+; CHECK-NEXT: i32 1, label %if2
+define void @switch(i16 %a) {
+  %a24 = zext i16 %a to i24
+  switch i24 %a24, label %end [
+    i24 0, label %if1
+    i24 1, label %if2
+  ]
+if1:
+  ret void
+if2:
+  ret void
+end:
+  ret void
+}
diff --git a/test/Transforms/NaCl/replace-ptrs-with-ints.ll b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
new file mode 100644
index 0000000000..7761661798
--- /dev/null
+++ b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
@@ -0,0 +1,639 @@
+; RUN: opt %s -replace-ptrs-with-ints -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+%struct = type { i32, i32 }
+
+declare %struct* @addr_taken_func(%struct*)
+
+@addr_of_func = global %struct* (%struct*)* @addr_taken_func
+; CHECK: @addr_of_func = global %struct* (%struct*)* bitcast (i32 (i32)* @addr_taken_func to %struct* (%struct*)*)
+
+@blockaddr = global i8* blockaddress(@indirectbr, %l1)
+; CHECK: @blockaddr = global i8* blockaddress(@indirectbr, %l1)
+
+
+define i8* @pointer_arg(i8* %ptr, i64 %non_ptr) {
+  ret i8* %ptr
+}
+; CHECK: define i32 @pointer_arg(i32 %ptr, i64 %non_ptr) {
+; CHECK-NEXT: ret i32 %ptr
+; CHECK-NEXT: }
+
+
+declare i8* @declared_func(i8*, i64)
+; CHECK: declare i32 @declared_func(i32, i64)
+
+
+define void @self_reference_phi(i8* %ptr) {
+entry:
+  br label %loop
+loop:
+  %x = phi i8* [ %x, %loop ], [ %ptr, %entry ]
+  br label %loop
+}
+; CHECK: define void @self_reference_phi(i32 %ptr) {
+; CHECK: %x = phi i32 [ %x, %loop ], [ %ptr, %entry ]
+
+; Self-referencing bitcasts are possible in unreachable basic blocks.
+; It is not very likely that we will encounter this, but we handle it
+; for completeness.
+define void @self_reference_bitcast(i8** %dest) {
+  ret void
+unreachable_loop:
+  store i8* %self_ref, i8** %dest
+  %self_ref = bitcast i8* %self_ref to i8*
+  store i8* %self_ref, i8** %dest
+  br label %unreachable_loop
+}
+; CHECK: define void @self_reference_bitcast(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define void @circular_reference_bitcasts(i8** %dest) {
+  ret void
+unreachable_loop:
+  store i8* %cycle1, i8** %dest
+  %cycle1 = bitcast i8* %cycle2 to i8*
+  %cycle2 = bitcast i8* %cycle1 to i8*
+  br label %unreachable_loop
+}
+; CHECK: define void @circular_reference_bitcasts(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define void @circular_reference_inttoptr(i8** %dest) {
+  ret void
+unreachable_loop:
+  %ptr = inttoptr i32 %int to i8*
+  %int = ptrtoint i8* %ptr to i32
+  store i8* %ptr, i8** %dest
+  br label %unreachable_loop
+}
+; CHECK: define void @circular_reference_inttoptr(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define i8* @forwards_reference(%struct** %ptr) {
+  br label %block1
+block2:
+  ; Forwards reference to %val.
+  %cast = bitcast %struct* %val to i8*
+  br label %block3
+block1:
+  %val = load %struct** %ptr
+  br label %block2
+block3:
+  ; Backwards reference to a forwards reference that has already been
+  ; resolved.
+  ret i8* %cast
+}
+; CHECK: define i32 @forwards_reference(i32 %ptr) {
+; CHECK-NEXT: br label %block1
+; CHECK: block2:
+; CHECK-NEXT: br label %block3
+; CHECK: block1:
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %val = load i32* %ptr.asptr
+; CHECK-NEXT: br label %block2
+; CHECK: block3:
+; CHECK-NEXT: ret i32 %val
+
+
+define i8* @phi_multiple_entry(i1 %arg, i8* %ptr) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i8* [ %ptr, %entry ], [ %ptr, %entry ]
+  ret i8* %result
+}
+; CHECK: define i32 @phi_multiple_entry(i1 %arg, i32 %ptr) {
+; CHECK: %result = phi i32 [ %ptr, %entry ], [ %ptr, %entry ]
+
+
+define i8* @select(i1 %cond, i8* %val1, i8* %val2) {
+  %r = select i1 %cond, i8* %val1, i8* %val2
+  ret i8* %r
+}
+; CHECK: define i32 @select(i1 %cond, i32 %val1, i32 %val2) {
+; CHECK-NEXT: %r = select i1 %cond, i32 %val1, i32 %val2
+
+
+define i32* @ptrtoint_same_size(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i32
+  %b = add i32 %a, 4
+  %c = inttoptr i32 %b to i32*
+  ret i32* %c
+}
+; CHECK: define i32 @ptrtoint_same_size(i32 %ptr) {
+; CHECK-NEXT: %b = add i32 %ptr, 4
+; CHECK-NEXT: ret i32 %b
+
+
+define i32* @ptrtoint_different_size(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i64
+  %b = add i64 %a, 4
+  %c = inttoptr i64 %b to i32*
+  ret i32* %c
+}
+; CHECK: define i32 @ptrtoint_different_size(i32 %ptr) {
+; CHECK-NEXT: %a = zext i32 %ptr to i64
+; CHECK-NEXT: %b = add i64 %a, 4
+; CHECK-NEXT: %c = trunc i64 %b to i32
+; CHECK-NEXT: ret i32 %c
+
+define i8 @ptrtoint_truncates_var(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i8
+  ret i8 %a
+}
+; CHECK: define i8 @ptrtoint_truncates_var(i32 %ptr) {
+; CHECK-NEXT: %a = trunc i32 %ptr to i8
+
+define i8 @ptrtoint_truncates_global() {
+  %a = ptrtoint i32* @var to i8
+  ret i8 %a
+}
+; CHECK: define i8 @ptrtoint_truncates_global() {
+; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32
+; CHECK-NEXT: %a = trunc i32 %expanded to i8
+
+
+define i32* @pointer_bitcast(i64* %ptr) {
+  %cast = bitcast i64* %ptr to i32*
+  ret i32* %cast
+}
+; CHECK: define i32 @pointer_bitcast(i32 %ptr) {
+; CHECK-NEXT: ret i32 %ptr
+
+; Same-type non-pointer bitcasts happen to be left alone by this pass.
+define i32 @no_op_bitcast(i32 %val) {
+  %val2 = bitcast i32 %val to i32
+  ret i32 %val2
+}
+; CHECK: define i32 @no_op_bitcast(i32 %val) {
+; CHECK-NEXT: %val2 = bitcast i32 %val to i32
+
+define i64 @kept_bitcast(double %d) {
+  %i = bitcast double %d to i64
+  ret i64 %i
+}
+; CHECK: define i64 @kept_bitcast(double %d) {
+; CHECK-NEXT: %i = bitcast double %d to i64
+
+
+define i32 @constant_pointer_null() {
+  %val = ptrtoint i32* null to i32
+  ret i32 %val
+}
+; CHECK: define i32 @constant_pointer_null() {
+; CHECK-NEXT: ret i32 0
+
+define i32 @constant_pointer_undef() {
+  %val = ptrtoint i32* undef to i32
+  ret i32 %val
+}
+; CHECK: define i32 @constant_pointer_undef() {
+; CHECK-NEXT: ret i32 undef
+
+define i16* @constant_pointer_null_load() {
+  %val = load i16** null
+  ret i16* %val
+}
+; CHECK: define i32 @constant_pointer_null_load() {
+; CHECK-NEXT: %.asptr = inttoptr i32 0 to i32*
+; CHECK-NEXT: %val = load i32* %.asptr
+
+define i16* @constant_pointer_undef_load() {
+  %val = load i16** undef
+  ret i16* %val
+}
+; CHECK: define i32 @constant_pointer_undef_load() {
+; CHECK-NEXT: %.asptr = inttoptr i32 undef to i32*
+; CHECK-NEXT: %val = load i32* %.asptr
+
+
+define i8 @load(i8* %ptr) {
+  %x = load i8* %ptr
+  ret i8 %x
+}
+; CHECK: define i8 @load(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %x = load i8* %ptr.asptr
+
+define void @store(i8* %ptr, i8 %val) {
+  store i8 %val, i8* %ptr
+  ret void
+}
+; CHECK: define void @store(i32 %ptr, i8 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: store i8 %val, i8* %ptr.asptr
+
+
+define i8* @load_ptr(i8** %ptr) {
+  %x = load i8** %ptr
+  ret i8* %x
+}
+; CHECK: define i32 @load_ptr(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %x = load i32* %ptr.asptr
+
+define void @store_ptr(i8** %ptr, i8* %val) {
+  store i8* %val, i8** %ptr
+  ret void
+}
+; CHECK: define void @store_ptr(i32 %ptr, i32 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: store i32 %val, i32* %ptr.asptr
+
+
+define i8 @load_attrs(i8* %ptr) {
+  %x = load atomic volatile i8* %ptr seq_cst, align 128
+  ret i8 %x
+}
+; CHECK: define i8 @load_attrs(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %x = load atomic volatile i8* %ptr.asptr seq_cst, align 128
+
+define void @store_attrs(i8* %ptr, i8 %val) {
+  store atomic volatile i8 %val, i8* %ptr singlethread release, align 256
+  ret void
+}
+; CHECK: define void @store_attrs(i32 %ptr, i8 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: store atomic volatile i8 %val, i8* %ptr.asptr singlethread release, align 256
+
+
+define i32 @cmpxchg(i32* %ptr, i32 %a, i32 %b) {
+  %r = cmpxchg i32* %ptr, i32 %a, i32 %b seq_cst
+  ret i32 %r
+}
+; CHECK: define i32 @cmpxchg(i32 %ptr, i32 %a, i32 %b) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %r = cmpxchg i32* %ptr.asptr, i32 %a, i32 %b seq_cst
+
+define i32 @atomicrmw(i32* %ptr, i32 %x) {
+  %r = atomicrmw add i32* %ptr, i32 %x seq_cst
+  ret i32 %r
+}
+; CHECK: define i32 @atomicrmw(i32 %ptr, i32 %x) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %r = atomicrmw add i32* %ptr.asptr, i32 %x seq_cst
+
+
+define i8* @indirect_call(i8* (i8*)* %func, i8* %arg) {
+  %result = call i8* %func(i8* %arg)
+  ret i8* %result
+}
+; CHECK: define i32 @indirect_call(i32 %func, i32 %arg) {
+; CHECK-NEXT: %func.asptr = inttoptr i32 %func to i32 (i32)*
+; CHECK-NEXT: %result = call i32 %func.asptr(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+
+; Test forwards reference
+define i8* @direct_call1(i8* %arg) {
+  %result = call i8* @direct_call2(i8* %arg)
+  ret i8* %result
+}
+; CHECK: define i32 @direct_call1(i32 %arg) {
+; CHECK-NEXT: %result = call i32 @direct_call2(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+; Test backwards reference
+define i8* @direct_call2(i8* %arg) {
+  %result = call i8* @direct_call1(i8* %arg)
+  ret i8* %result
+}
+; CHECK: define i32 @direct_call2(i32 %arg) {
+; CHECK-NEXT: %result = call i32 @direct_call1(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+
+@var = global i32 0
+
+define i32* @get_addr_of_global() {
+  ret i32* @var
+}
+; CHECK: define i32 @get_addr_of_global() {
+; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32
+; CHECK-NEXT: ret i32 %expanded
+
+define %struct* (%struct*)* @get_addr_of_func() {
+  ret %struct* (%struct*)* @addr_taken_func
+}
+; CHECK: define i32 @get_addr_of_func() {
+; CHECK-NEXT: %expanded = ptrtoint i32 (i32)* @addr_taken_func to i32
+; CEHCK-NEXT: ret i32 %expanded
+
+
+define i32 @load_global() {
+  %val = load i32* @var
+  ret i32 %val
+}
+; CHECK: define i32 @load_global() {
+; CHECK-NEXT: %val = load i32* @var
+; CHECK-NEXT: ret i32 %val
+
+define i16 @load_global_bitcast() {
+  %ptr = bitcast i32* @var to i16*
+  %val = load i16* %ptr
+  ret i16 %val
+}
+; CHECK: define i16 @load_global_bitcast() {
+; CHECK-NEXT: %var.bc = bitcast i32* @var to i16*
+; CHECK-NEXT: %val = load i16* %var.bc
+; CHECK-NEXT: ret i16 %val
+
+
+declare void @receive_alloca(%struct* %ptr)
+
+define void @alloca_fixed() {
+  %buf = alloca %struct, align 128
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK: define void @alloca_fixed() {
+; CHECK-NEXT: %buf = alloca i8, i32 8, align 128
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+; When the size passed to alloca is a constant, it should be a
+; constant in the output too.
+define void @alloca_fixed_array() {
+  %buf = alloca %struct, i32 100
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK: define void @alloca_fixed_array() {
+; CHECK-NEXT: %buf = alloca i8, i32 800, align 8
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+define void @alloca_variable(i32 %size) {
+  %buf = alloca %struct, i32 %size
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK: define void @alloca_variable(i32 %size) {
+; CHECK-NEXT: %buf.alloca_mul = mul i32 8, %size
+; CHECK-NEXT: %buf = alloca i8, i32 %buf.alloca_mul
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+define void @alloca_alignment_i32() {
+  %buf = alloca i32
+  ret void
+}
+; CHECK: void @alloca_alignment_i32() {
+; CHECK-NEXT: alloca i8, i32 4, align 4
+
+define void @alloca_alignment_double() {
+  %buf = alloca double
+  ret void
+}
+; CHECK: void @alloca_alignment_double() {
+; CHECK-NEXT: alloca i8, i32 8, align 8
+
+define void @alloca_lower_alignment() {
+  %buf = alloca i32, align 1
+  ret void
+}
+; CHECK: void @alloca_lower_alignment() {
+; CHECK-NEXT: alloca i8, i32 4, align 1
+
+
+; This tests for a bug in which, when processing the store's %buf2
+; operand, ReplacePtrsWithInts accidentally strips off the ptrtoint
+; cast that it previously introduced for the 'alloca', causing an
+; internal sanity check to fail.
+define void @alloca_cast_stripping() {
+  %buf = alloca i32
+  %buf1 = ptrtoint i32* %buf to i32
+  %buf2 = inttoptr i32 %buf1 to i32*
+  store i32 0, i32* %buf2
+  ret void
+}
+; CHECK: define void @alloca_cast_stripping() {
+; CHECK-NEXT: %buf = alloca i8, i32 4
+; CHECK-NEXT: %buf.bc = bitcast i8* %buf to i32*
+; CHECK-NEXT: store i32 0, i32* %buf.bc
+
+
+define i1 @compare(i8* %ptr1, i8* %ptr2) {
+  %cmp = icmp ult i8* %ptr1, %ptr2
+  ret i1 %cmp
+}
+; CHECK: define i1 @compare(i32 %ptr1, i32 %ptr2) {
+; CHECK-NEXT: %cmp = icmp ult i32 %ptr1, %ptr2
+
+
+declare i8* @llvm.some.intrinsic(i8* %ptr)
+
+define i8* @preserve_intrinsic_type(i8* %ptr) {
+  %result = call i8* @llvm.some.intrinsic(i8* %ptr)
+  ret i8* %result
+}
+; CHECK: define i32 @preserve_intrinsic_type(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %result = call i8* @llvm.some.intrinsic(i8* %ptr.asptr)
+; CHECK-NEXT: %result.asint = ptrtoint i8* %result to i32
+; CHECK-NEXT: ret i32 %result.asint
+
+
+; Just check that the pass does not crash on inline asm.
+define i16* @inline_asm1(i8* %ptr) {
+  %val = call i16* asm "foo", "=r,r"(i8* %ptr)
+  ret i16* %val
+}
+
+define i16** @inline_asm2(i8** %ptr) {
+  %val = call i16** asm "foo", "=r,r"(i8** %ptr)
+  ret i16** %val
+}
+
+
+declare void @llvm.dbg.declare(metadata, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+define void @debug_declare(i32 %val) {
+  ; We normally expect llvm.dbg.declare to be used on an alloca.
+  %var = alloca i32
+  tail call void @llvm.dbg.declare(metadata !{i32* %var}, metadata !{})
+  tail call void @llvm.dbg.declare(metadata !{i32 %val}, metadata !{})
+  ret void
+}
+; CHECK: define void @debug_declare(i32 %val) {
+; CHECK-NEXT: %var = alloca i8, i32 4
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata !{i8* %var}, metadata !0)
+; This case is currently not converted.
+; CHECK-NEXT: call void @llvm.dbg.declare(metadata !{null}, metadata !0)
+; CHECK-NEXT: ret void
+
+; For now, debugging info for values is lost.  replaceAllUsesWith()
+; does not work for metadata references -- it converts them to nulls.
+; This makes dbg.value too tricky to handle for now.
+define void @debug_value(i32 %val, i8* %ptr) {
+  tail call void @llvm.dbg.value(metadata !{i32 %val}, i64 1, metadata !{})
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 2, metadata !{})
+  ret void
+}
+; CHECK: define void @debug_value(i32 %val, i32 %ptr) {
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{null}, i64 1, metadata !0)
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{null}, i64 2, metadata !0)
+; CHECK-NEXT: ret void
+
+
+declare void @llvm.lifetime.start(i64 %size, i8* %ptr)
+declare void @llvm.invariant.start(i64 %size, i8* %ptr)
+declare void @llvm.invariant.end(i64 %size, i8* %ptr)
+
+; GVN can introduce the following horrible corner case of a lifetime
+; marker referencing a PHI node.  But we convert the phi to i32 type,
+; and lifetime.start doesn't work on an inttoptr converting an i32 phi
+; to a pointer.  Because of this, we just strip out all lifetime
+; markers.
+
+define void @alloca_lifetime_via_phi() {
+entry:
+  %buf = alloca i8
+  br label %block
+block:
+  %phi = phi i8* [ %buf, %entry ]
+  call void @llvm.lifetime.start(i64 -1, i8* %phi)
+  ret void
+}
+; CHECK: define void @alloca_lifetime_via_phi() {
+; CHECK: %phi = phi i32 [ %buf.asint, %entry ]
+; CHECK-NEXT: ret void
+
+define void @alloca_lifetime() {
+  %buf = alloca i8
+  call void @llvm.lifetime.start(i64 -1, i8* %buf)
+  ret void
+}
+; CHECK: define void @alloca_lifetime() {
+; CHECK-NEXT: %buf = alloca i8
+; CHECK-NEXT: ret void
+
+define void @alloca_lifetime_via_bitcast() {
+  %buf = alloca i32
+  %buf_cast = bitcast i32* %buf to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %buf_cast)
+  ret void
+}
+; CHECK: define void @alloca_lifetime_via_bitcast() {
+; CHECK-NEXT: %buf = alloca i8, i32 4
+; CHECK-NEXT: ret void
+
+define void @strip_invariant_markers() {
+  %buf = alloca i8
+  call void @llvm.invariant.start(i64 1, i8* %buf)
+  call void @llvm.invariant.end(i64 1, i8* %buf)
+  ret void
+}
+; CHECK: define void @strip_invariant_markers() {
+; CHECK-NEXT: %buf = alloca i8
+; CHECK-NEXT: ret void
+
+
+; "nocapture" and "noalias" only apply to pointers, so must be stripped.
+define void @nocapture_attr(i8* nocapture noalias %ptr) {
+  ret void
+}
+; CHECK: define void @nocapture_attr(i32 %ptr) {
+
+; "nounwind" should be preserved.
+define void @nounwind_func_attr() nounwind {
+  ret void
+}
+; CHECK: define void @nounwind_func_attr() [[NOUNWIND:#[0-9]+]] {
+
+define void @nounwind_call_attr() {
+  call void @nounwind_func_attr() nounwind
+  ret void
+}
+; CHECK: define void @nounwind_call_attr() {
+; CHECK: call void @nounwind_func_attr() {{.*}}[[NOUNWIND]]
+
+define fastcc void @fastcc_func() {
+  ret void
+}
+; CHECK: define fastcc void @fastcc_func() {
+
+define void @fastcc_call() {
+  call fastcc void @fastcc_func()
+  ret void
+}
+; CHECK: define void @fastcc_call() {
+; CHECK-NEXT: call fastcc void @fastcc_func()
+
+
+; Just check that the pass does not crash on getelementptr.  (The pass
+; should not depend unnecessarily on ExpandGetElementPtr having been
+; run.)
+define i8* @getelementptr(i8* %ptr) {
+  %gep = getelementptr i8* %ptr, i32 10
+  ret i8* %gep
+}
+
+; Just check that the pass does not crash on va_arg.
+define i32* @va_arg(i8* %valist) {
+  %r = va_arg i8* %valist, i32*
+  ret i32* %r
+}
+
+
+define void @indirectbr(i8* %addr) {
+  indirectbr i8* %addr, [ label %l1, label %l2 ]
+l1:
+  ret void
+l2:
+  ret void
+}
+; CHECK: define void @indirectbr(i32 %addr) {
+; CHECK-NEXT: %addr.asptr = inttoptr i32 %addr to i8*
+; CHECK-NEXT: indirectbr i8* %addr.asptr, [label %l1, label %l2]
+
+
+define i8* @invoke(i8* %val) {
+  %result = invoke i8* @direct_call1(i8* %val)
+      to label %cont unwind label %lpad
+cont:
+  ret i8* %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality void (i8*)* @personality_func cleanup
+  %p = extractvalue { i8*, i32 } %lp, 0
+  %s = insertvalue { i8*, i32 } %lp, i8* %val, 0
+  ret i8* %p
+}
+; CHECK: define i32 @invoke(i32 %val) {
+; CHECK-NEXT: %result = invoke i32 @direct_call1(i32 %val)
+; CHECK-NEXT:         to label %cont unwind label %lpad
+; CHECK: %lp = landingpad { i8*, i32 } personality void (i8*)* bitcast (void (i32)* @personality_func to void (i8*)*)
+; CHECK: %p = extractvalue { i8*, i32 } %lp, 0
+; CHECK-NEXT: %p.asint = ptrtoint i8* %p to i32
+; CHECK-NEXT: %val.asptr = inttoptr i32 %val to i8*
+; CHECK-NEXT: %s = insertvalue { i8*, i32 } %lp, i8* %val.asptr, 0
+; CHECK-NEXT: ret i32 %p.asint
+
+define void @personality_func(i8* %arg) {
+  ret void
+}
+
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+@typeid = global i32 0
+
+; The argument here must be left as a bitcast, otherwise the backend
+; rejects it.
+define void @typeid_for() {
+  %bc = bitcast i32* @typeid to i8*
+  call i32 @llvm.eh.typeid.for(i8* %bc)
+  ret void
+}
+; CHECK: define void @typeid_for() {
+; CHECK-NEXT: %typeid.bc = bitcast i32* @typeid to i8*
+; CHECK-NEXT: call i32 @llvm.eh.typeid.for(i8* %typeid.bc)
+
+
+; CHECK: attributes {{.*}}[[NOUNWIND]] = { nounwind }
diff --git a/test/Transforms/NaCl/resolve-aliases.ll b/test/Transforms/NaCl/resolve-aliases.ll
new file mode 100644
index 0000000000..82ad54d74e
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-aliases.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -resolve-aliases -S | FileCheck %s
+
+; CHECK-NOT: @alias
+
+@r1 = internal global i32 zeroinitializer
+@a1 = alias i32* @r1
+define i32* @usea1() {
+; CHECK: ret i32* @r1
+  ret i32* @a1
+}
+
+@funcalias = alias i32* ()* @usea1
+; CHECK: @usefuncalias
+define void @usefuncalias() {
+; CHECK: call i32* @usea1
+  %1 = call i32* @funcalias()
+  ret void
+}
+
+@bc1 = global i8* bitcast (i32* @r1 to i8*)
+@bcalias = alias i8* bitcast (i32* @r1 to i8*)
+
+; CHECK: @usebcalias
+define i8* @usebcalias() {
+; CHECK: ret i8* bitcast (i32* @r1 to i8*)
+  ret i8* @bcalias
+}
+
+
+@fa2 = alias i32* ()* @funcalias
+; CHECK: @usefa2
+define void @usefa2() {
+; CHECK: call i32* @usea1
+  call i32* @fa2()
+  ret void
+}
diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll
new file mode 100644
index 0000000000..3aa263fa9a
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -resolve-pnacl-intrinsics -S | FileCheck %s
+
+declare i32 @llvm.nacl.setjmp(i8*)
+declare void @llvm.nacl.longjmp(i8*, i32)
+
+; These declarations must be here because the function pass expects
+; to find them. In real life they're inserted by the translator
+; before the function pass runs.
+declare i32 @setjmp(i8*)
+declare void @longjmp(i8*, i32)
+
+; CHECK-NOT: call i32 @llvm.nacl.setjmp
+; CHECK-NOT: call void @llvm.nacl.longjmp
+
+define i32 @call_setjmp(i8* %arg) {
+  %val = call i32 @llvm.nacl.setjmp(i8* %arg)
+; CHECK: %val = call i32 @setjmp(i8* %arg)
+  ret i32 %val
+}
+
+define void @call_longjmp(i8* %arg, i32 %num) {
+  call void @llvm.nacl.longjmp(i8* %arg, i32 %num)
+; CHECK: call void @longjmp(i8* %arg, i32 %num)
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-flt-rounds.ll b/test/Transforms/NaCl/rewrite-flt-rounds.ll
new file mode 100644
index 0000000000..3c368b8bc3
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-flt-rounds.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the RewriteLLVMIntrinsics pass
+
+declare i32 @llvm.flt.rounds()
+
+; No declaration or definition of llvm.flt.rounds() should remain.
+; CLEANED-NOT: @llvm.flt.rounds
+
+define i32 @call_flt_rounds() {
+; CHECK: call_flt_rounds
+; CHECK-NEXT: ret i32 1
+  %val = call i32 @llvm.flt.rounds()
+  ret i32 %val
+}
+
+; A more complex example with a number of calls in several BBs.
+define i32 @multiple_calls(i64* %arg, i32 %num) {
+; CHECK: multiple_calls
+entryblock:
+; CHECK: entryblock
+  %v1 = call i32 @llvm.flt.rounds()
+  br label %block1
+block1:
+; CHECK: block1:
+; CHECK-NEXT: %v3 = add i32 1, 1
+  %v2 = call i32 @llvm.flt.rounds()
+  %v3 = add i32 %v2, %v1
+  br label %exitblock
+exitblock:
+; CHECK: exitblock:
+; CHECK-NEXT: %v4 = add i32 1, %v3
+; CHECK-NEXT: %v6 = add i32 1, %v4
+  %v4 = add i32 %v2, %v3
+  %v5 = call i32 @llvm.flt.rounds()
+  %v6 = add i32 %v5, %v4
+  ret i32 %v6
+}
diff --git a/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll
new file mode 100644
index 0000000000..3ab64d9dd2
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check how the pass behaves in the presence of library functions with wrong
+; signatures.
+
+declare i8 @longjmp(i64)
+
+@flongjmp = global i8 (i64)* @longjmp
+; CHECK: @flongjmp = global i8 (i64)* bitcast (void (i64*, i32)* @longjmp to i8 (i64)*)
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val)
+
+declare i8* @memcpy(i32)
+
+define i8* @call_bad_memcpy(i32 %arg) {
+  %result = call i8* @memcpy(i32 %arg)
+  ret i8* %result
+}
+
+; CHECK: define i8* @call_bad_memcpy(i32 %arg) {
+; CHECK:   %result = call i8* bitcast (i8* (i8*, i8*, i32)* @memcpy to i8* (i32)*)(i32 %arg)
+
+declare i8 @setjmp()
+
+; This simulates a case where the original C file had a correct setjmp
+; call but due to linking order a wrong declaration made it into the
+; IR. In this case, the correct call is bitcasted to the correct type.
+; The pass should treat this properly by creating a direct intrinsic
+; call instead of going through the wrapper.
+define i32 @call_valid_setjmp(i64* %buf) {
+  %result = call i32 bitcast (i8 ()* @setjmp to i32 (i64*)*)(i64* %buf)
+  ret i32 %result
+}
+
+; CHECK:      define i32 @call_valid_setjmp(i64* %buf) {
+; CHECK-NEXT:   %jmp_buf_i8 = bitcast i64* %buf to i8*
+; CHECK-NEXT:   %result = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8)
+; CHECK-NEXT:   ret i32 %result
+; CHECK-NEXT: }
diff --git a/test/Transforms/NaCl/rewrite-longjmp-no-store.ll b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll
new file mode 100644
index 0000000000..134593ad39
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test that when there are no uses other than calls to longjmp,
+; no function body is generated.
+
+declare void @longjmp(i64*, i32)
+
+; No declaration or definition of longjmp() should remain.
+; CLEANED-NOT: @longjmp
+
+define void @call_longjmp(i64* %arg, i32 %num) {
+  call void @longjmp(i64* %arg, i32 %num)
+; CHECK: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num)
+  ret void
+}
+
diff --git a/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll
new file mode 100644
index 0000000000..ed7818ec96
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check that the rewrite pass behaves correctly in the presence 
+; of various uses of longjmp that are not calls.
+
+@fp = global void (i64*, i32)* @longjmp, align 8
+; CHECK: @fp = global void (i64*, i32)* @longjmp, align 8
+@arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16
+; CHECK: @arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val) {
+
+declare void @longjmp(i64*, i32)
+
+declare void @somefunc(i8*)
+
+define void @foo() {
+entry:
+  call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*))
+; CHECK: call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*))
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll
new file mode 100644
index 0000000000..4d91774c34
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+
+declare i8* @memcpy(i8*, i8*, i32)
+declare i8* @memmove(i8*, i8*, i32)
+declare i8* @memset(i8*, i32, i32)
+
+; No declaration or definition of the library functions should remain, since
+; the only uses of mem* functions are calls.
+; CLEANED-NOT: @memcpy
+; CLEANED-NOT: @memmove
+; CLEANED-NOT: @memset
+
+define i8* @call_memcpy(i8* %dest, i8* %src, i32 %len) {
+  %result = call i8* @memcpy(i8* %dest, i8* %src, i32 %len)
+  ret i8* %result
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+define i8* @call_memmove(i8* %dest, i8* %src, i32 %len) {
+  %result = call i8* @memmove(i8* %dest, i8* %src, i32 %len)
+  ret i8* %result
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+define i8* @call_memset(i8* %dest, i32 %c, i32 %len) {
+  %result = call i8* @memset(i8* %dest, i32 %c, i32 %len)
+  ret i8* %result
+; CHECK: %trunc_byte = trunc i32 %c to i8
+; CHECK: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+; CHECK: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
+
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
diff --git a/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll
new file mode 100644
index 0000000000..5c6bdfdcb5
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check that the rewrite pass behaves correctly in the presence 
+; of various uses of mem* that are not calls.
+
+@fpcpy = global i8* (i8*, i8*, i32)* @memcpy
+; CHECK: @fpcpy = global i8* (i8*, i8*, i32)* @memcpy
+@fpmove = global i8* (i8*, i8*, i32)* @memmove
+; CHECK: @fpmove = global i8* (i8*, i8*, i32)* @memmove
+@fpset = global i8* (i8*, i32, i32)* @memset
+; CHECK: @fpset = global i8* (i8*, i32, i32)* @memset
+
+; CHECK: define internal i8* @memcpy(i8* %dest, i8* %src, i32 %len) {
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+; CHECK: define internal i8* @memmove(i8* %dest, i8* %src, i32 %len) {
+; CHECK:   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+; CHECK: define internal i8* @memset(i8* %dest, i32 %val, i32 %len) {
+; CHECK:   %trunc_byte = trunc i32 %val to i8
+; CHECK:   call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+declare i8* @memcpy(i8*, i8*, i32)
+declare i8* @memmove(i8*, i8*, i32)
+declare i8* @memset(i8*, i32, i32)
diff --git a/test/Transforms/NaCl/rewrite-setjmp-store-error.ll b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll
new file mode 100644
index 0000000000..1c3dd0d4cb
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S 2>&1 | FileCheck %s
+; Test that the pass enforces not being able to store the address
+; of setjmp.
+
+declare i32 @setjmp(i64*)
+
+define i32 @takeaddr_setjmp(i64* %arg) {
+  %fp = alloca i32 (i64*)*, align 8
+; CHECK: Taking the address of setjmp is invalid
+  store i32 (i64*)* @setjmp, i32 (i64*)** %fp, align 8
+  ret i32 7
+}
+
diff --git a/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll
new file mode 100644
index 0000000000..f34f004d7f
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the RewritePNaClLibraryCalls pass
+
+declare i32 @setjmp(i64*)
+declare void @longjmp(i64*, i32)
+
+; No declaration or definition of setjmp() should remain.
+; CLEANED-NOT: @setjmp
+
+; Since the address of longjmp is being taken here, a body is generated
+; for it, which does a cast and calls an intrinsic
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val) {
+; CHECK: entry:
+; CHECK:   %jmp_buf_i8 = bitcast i64* %env to i8*
+; CHECK:   call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %val)
+; CHECK:   unreachable
+; CHECK: }
+
+define i32 @call_setjmp(i64* %arg) {
+; CHECK-NOT: call i32 @setjmp
+; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8*
+; CHECK-NEXT: %val = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8)
+  %val = call i32 @setjmp(i64* %arg)
+  ret i32 %val
+}
+
+define void @call_longjmp(i64* %arg, i32 %num) {
+; CHECK-NOT: call void @longjmp
+; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8*
+; CHECK-NEXT: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num)
+  call void @longjmp(i64* %arg, i32 %num)
+  ret void
+}
+
+define i32 @takeaddr_longjmp(i64* %arg, i32 %num) {
+  %fp = alloca void (i64*, i32)*, align 8
+; CHECK: store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp
+  store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp, align 8
+  ret i32 7
+}
+
+; A more complex example with a number of calls in several BBs
+define void @multiple_calls(i64* %arg, i32 %num) {
+entryblock:
+; CHECK: entryblock
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num)
+  br label %block1
+block1:
+; CHECK: block1
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num)
+; CHECK: call i32 @llvm.nacl.setjmp
+  %val = call i32 @setjmp(i64* %arg)
+  %num2 = add i32 %val, %num
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num2)
+  br label %exitblock
+exitblock:
+  %num3 = add i32 %num, %num
+  call void @longjmp(i64* %arg, i32 %num3)
+  %num4 = add i32 %num, %num3
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num4)
+  ret void
+}
+
+; CHECK: declare i32 @llvm.nacl.setjmp(i8*)
+; CHECK: declare void @llvm.nacl.longjmp(i8*, i32)
+
diff --git a/test/Transforms/NaCl/strip-attributes.ll b/test/Transforms/NaCl/strip-attributes.ll
new file mode 100644
index 0000000000..66224a8977
--- /dev/null
+++ b/test/Transforms/NaCl/strip-attributes.ll
@@ -0,0 +1,120 @@
+; RUN: opt -S -nacl-strip-attributes %s | FileCheck %s
+
+
+@var = unnamed_addr global i32 0
+; CHECK: @var = global i32 0
+
+
+define fastcc void @func_attrs(i32 inreg, i32 zeroext)
+    unnamed_addr noreturn nounwind readonly align 8 {
+  ret void
+}
+; CHECK: define void @func_attrs(i32, i32) {
+
+define hidden void @hidden_visibility() {
+  ret void
+}
+; CHECK: define void @hidden_visibility() {
+
+define protected void @protected_visibility() {
+  ret void
+}
+; CHECK: define void @protected_visibility() {
+
+
+define void @call_attrs() {
+  call fastcc void @func_attrs(i32 inreg 10, i32 zeroext 20) noreturn nounwind readonly
+  ret void
+}
+; CHECK: define void @call_attrs()
+; CHECK: call void @func_attrs(i32 10, i32 20){{$}}
+
+
+; We currently don't attempt to strip attributes from intrinsic
+; declarations because the reader automatically inserts attributes
+; based on built-in knowledge of intrinsics, so it is difficult to get
+; rid of them here.
+declare i8* @llvm.nacl.read.tp()
+; CHECK: declare i8* @llvm.nacl.read.tp() #{{[0-9]+}}
+
+define void @arithmetic_attrs() {
+  %add = add nsw i32 1, 2
+  %shl = shl nuw i32 3, 4
+  %lshr = lshr exact i32 2, 1
+  ret void
+}
+; CHECK: define void @arithmetic_attrs() {
+; CHECK-NEXT: %add = add i32 1, 2
+; CHECK-NEXT: %shl = shl i32 3, 4
+; CHECK-NEXT: %lshr = lshr i32 2, 1
+
+
+; Implicit default alignments are changed to explicit alignments.
+define void @default_alignment_attrs(float %f, double %d) {
+  load i8* null
+  load i32* null
+  load float* null
+  load double* null
+
+  store i8 100, i8* null
+  store i32 100, i32* null
+  store float %f, float* null
+  store double %d, double* null
+  ret void
+}
+; CHECK: define void @default_alignment_attrs
+; CHECK-NEXT: load i8* null, align 1
+; CHECK-NEXT: load i32* null, align 1
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: store i8 100, i8* null, align 1
+; CHECK-NEXT: store i32 100, i32* null, align 1
+; CHECK-NEXT: store float %f, float* null, align 4
+; CHECK-NEXT: store double %d, double* null, align 8
+
+define void @reduce_alignment_assumptions() {
+  load i32* null, align 4
+  load float* null, align 2
+  load float* null, align 4
+  load float* null, align 8
+  load double* null, align 2
+  load double* null, align 8
+  load double* null, align 16
+
+  ; Higher alignment assumptions must be retained for atomics.
+  load atomic i32* null seq_cst, align 4
+  load atomic i32* null seq_cst, align 8
+  store atomic i32 100, i32* null seq_cst, align 4
+  store atomic i32 100, i32* null seq_cst, align 8
+  ret void
+}
+; CHECK: define void @reduce_alignment_assumptions
+; CHECK-NEXT: load i32* null, align 1
+; CHECK-NEXT: load float* null, align 1
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load double* null, align 1
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: load atomic i32* null seq_cst, align 4
+; CHECK-NEXT: load atomic i32* null seq_cst, align 4
+; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4
+; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+
+define void @reduce_memcpy_alignment_assumptions(i8* %ptr) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %ptr,
+                                       i32 20, i32 4, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %ptr,
+                                        i32 20, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99,
+                                  i32 20, i32 4, i1 false)
+  ret void
+}
+; CHECK: define void @reduce_memcpy_alignment_assumptions
+; CHECK-NEXT: call void @llvm.memcpy.{{.*}}  i32 20, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.{{.*}} i32 20, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.{{.*}}  i32 20, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/strip-branchweight-metadata.ll b/test/Transforms/NaCl/strip-branchweight-metadata.ll
new file mode 100644
index 0000000000..61d3a6d5af
--- /dev/null
+++ b/test/Transforms/NaCl/strip-branchweight-metadata.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+
+; Test that !prof metadata is removed from branches
+; CHECK: @foo
+; CHECK-NOT: !prof
+define i32 @foo(i32 %c) {
+  switch i32 %c, label %3 [
+    i32 5, label %4
+    i32 0, label %1
+    i32 4, label %2
+  ], !prof !0
+
+; <label>:1                                       ; preds = %0
+  br label %4
+
+; <label>:2                                       ; preds = %0
+  br label %4
+
+; <label>:3                                       ; preds = %0
+  br label %4
+
+; <label>:4                                       ; preds = %0, %3, %2, %1
+  %.0 = phi i32 [ -1, %1 ], [ 99, %2 ], [ 1, %3 ], [ 0, %0 ]
+  ret i32 %.0
+}
+
+; CHECK: ret i32 %.0
+; CHECK-NOT: !0 =
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 256, i32 8, i32 4}
diff --git a/test/Transforms/NaCl/strip-meta-leaves-debug.ll b/test/Transforms/NaCl/strip-meta-leaves-debug.ll
new file mode 100644
index 0000000000..acb5a81baa
--- /dev/null
+++ b/test/Transforms/NaCl/strip-meta-leaves-debug.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+; RUN: opt -S -strip-metadata -strip-debug %s | FileCheck %s --check-prefix=NODEBUG
+
+define i32 @foo(i32 %c) {
+; CHECK: @foo
+; CHECK-NEXT: call void @llvm.dbg{{.*}}, !dbg
+; CHECK-NEXT: ret{{.*}}, !dbg
+; NODEBUG: @foo
+; NODEBUG-NOT: !dbg
+  tail call void @llvm.dbg.value(metadata !{i32 %c}, i64 0, metadata !9), !dbg !10
+  ret i32 %c, !dbg !11
+}
+
+; CHECK: @llvm.dbg.value
+; NODEBUG: ret i32
+; NODEBUG-NOT: @llvm.dbg.value
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+; CHECK-NOT: MadeUpMetadata
+!MadeUpMetadata = !{}
+
+; CHECK: !llvm.dbg.cu
+!llvm.dbg.cu = !{!0}
+
+; CHECK-NOT: llvm.module.flags
+!llvm.module.flags = !{ !12}
+
+; CHECK: !0 =
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/tmp", metadata !"clang version 3.3 (trunk 176732) (llvm/trunk 176733)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"foo", metadata !"foo", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !8, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!4 = metadata !{i32 786473, metadata !"test.c", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{metadata !7, metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786689, metadata !3, metadata !"c", metadata !4, i32 16777217, metadata !7, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 1]
+!10 = metadata !{i32 1, i32 0, metadata !3, null}
+; CHECK: !11 =
+!11 = metadata !{i32 2, i32 0, metadata !3, null}
+; CHECK-NOT: !12 =
+!12 = metadata !{ i32 6, metadata !"Linker Options",
+     metadata !{
+        metadata !{ metadata !"-lz" },
+        metadata !{ metadata !"-framework", metadata !"Cocoa" } } }
diff --git a/test/Transforms/NaCl/strip-tbaa-metadata.ll b/test/Transforms/NaCl/strip-tbaa-metadata.ll
new file mode 100644
index 0000000000..c555af6712
--- /dev/null
+++ b/test/Transforms/NaCl/strip-tbaa-metadata.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+
+; Test that !tbaa is removed from loads/stores.
+; CHECK: @foo
+; CHECK-NOT: !tbaa
+define double @foo(i32* nocapture %ptr1, double* nocapture %ptr2) nounwind readonly {
+  store i32 99999, i32* %ptr1, align 1, !tbaa !0
+  %1 = load double* %ptr2, align 8, !tbaa !3
+  ret double %1
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; Test that !tbaa is removed from calls.
+; CHECK: @bar
+; CHECK-NOT: !tbaa
+define void @bar(i8* nocapture %p, i8* nocapture %q,
+       i8* nocapture %s) nounwind {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q,
+                                            i64 16, i32 1, i1 false), !tbaa !4
+  store i8 2, i8* %s, align 1, !tbaa !5
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p,
+                                            i64 16, i32 1, i1 false), !tbaa !4
+; CHECK ret void
+  ret void
+}
+
+; Test that the metadata nodes aren't left over.
+; CHECK-NOT: !0 =
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"A", metadata !1}
+!5 = metadata !{metadata !"B", metadata !1}
diff --git a/test/lit.cfg b/test/lit.cfg
index 8272e97c38..8931369172 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -203,7 +203,9 @@ else:
     pathext = ['']
 for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/|-)\bclang\b(?!-)",
                 r"\bgold\b",
-                r"\bllc\b",             r"\blli\b",
+                # LOCALMOD - match pnacl-llc
+                r"\bpnacl-llc\b",
+                r"(?<!-)\bllc\b",       r"\blli\b",
                 r"\bllvm-ar\b",         r"\bllvm-as\b",
                 r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
                 r"\bllvm-cov\b",        r"\bllvm-diff\b",
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 6b7c884516..e32aef3169 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory(llvm-dis)
 add_subdirectory(llvm-mc)
 
 add_subdirectory(llc)
+add_subdirectory(pnacl-llc)
 add_subdirectory(llvm-ranlib)
 add_subdirectory(llvm-ar)
 add_subdirectory(llvm-nm)
@@ -42,6 +43,11 @@ add_subdirectory(llvm-stress)
 add_subdirectory(llvm-mcmarkup)
 
 add_subdirectory(llvm-symbolizer)
+add_subdirectory(pnacl-abicheck)
+add_subdirectory(pnacl-bcanalyzer)
+add_subdirectory(pnacl-freeze)
+add_subdirectory(pnacl-thaw)
+add_subdirectory(bc-wrap)
 
 add_subdirectory(obj2yaml)
 add_subdirectory(yaml2obj)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index 25aa177b35..d7160f774d 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
+subdirectories = bugpoint llc pnacl-llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-mc llvm-nm llvm-objdump llvm-prof llvm-ranlib llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup pnacl-abicheck pnacl-bcanalyzer pnacl-freeze pnacl-thaw
 
 [component_0]
 type = Group
diff --git a/tools/Makefile b/tools/Makefile
index eaf9ed3577..b94f08f81c 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -28,14 +28,15 @@ OPTIONAL_DIRS := lldb
 # in parallel builds.  Please retain this ordering.
 DIRS := llvm-config
 PARALLEL_DIRS := opt llvm-as llvm-dis \
-                 llc llvm-ranlib llvm-ar llvm-nm \
+                 llc pnacl-llc llvm-ranlib llvm-ar llvm-nm \
                  llvm-prof llvm-link \
                  lli llvm-extract llvm-mc \
                  bugpoint llvm-bcanalyzer \
                  llvm-diff macho-dump llvm-objdump llvm-readobj \
 	         llvm-rtdyld llvm-dwarfdump llvm-cov \
-	         llvm-size llvm-stress llvm-mcmarkup \
-	         llvm-symbolizer obj2yaml yaml2obj
+                 llvm-size llvm-stress llvm-mcmarkup bc-wrap pso-stub \
+                 llvm-symbolizer pnacl-abicheck pnacl-bcanalyzer pnacl-freeze \
+                 pnacl-thaw obj2yaml yaml2obj
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/bc-wrap/CMakeLists.txt b/tools/bc-wrap/CMakeLists.txt
new file mode 100644
index 0000000000..7d8ce4fc11
--- /dev/null
+++ b/tools/bc-wrap/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS wrap support )
+
+add_llvm_tool(bc-wrap
+  bc_wrap.cpp
+  )
+\ No newline at end of file
diff --git a/tools/bc-wrap/LLVMBuild.txt b/tools/bc-wrap/LLVMBuild.txt
new file mode 100644
index 0000000000..a91f77625e
--- /dev/null
+++ b/tools/bc-wrap/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/bc-wrap/LLVMBuild.txt ----------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = bc-wrap
+parent = Tools
+required_libraries = Wrap Support
diff --git a/tools/bc-wrap/Makefile b/tools/bc-wrap/Makefile
new file mode 100644
index 0000000000..dccff2ecde
--- /dev/null
+++ b/tools/bc-wrap/Makefile
@@ -0,0 +1,20 @@
+#===- tools/bc-wrap/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = bc-wrap
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) Wrap
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/bc-wrap/bc_wrap.cpp b/tools/bc-wrap/bc_wrap.cpp
new file mode 100644
index 0000000000..5311f714ee
--- /dev/null
+++ b/tools/bc-wrap/bc_wrap.cpp
@@ -0,0 +1,123 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+/*
+ * Utility to wrap a .bc file, using LLVM standard+ custom headers.
+ */
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Wrap/bitcode_wrapperer.h"
+#include "llvm/Wrap/file_wrapper_input.h"
+#include "llvm/Wrap/file_wrapper_output.h"
+
+#include <ctype.h>
+#include <string.h>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::Required);
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("<output file>"));
+
+static cl::opt<bool> UnwrapFlag("u",
+                                cl::desc("unwrap rather than wrap the file"),
+                                cl::init(false));
+
+static cl::opt<bool> VerboseFlag("v",
+                                 cl::desc("print verbose header information"),
+                                 cl::init(false));
+
+static cl::opt<bool> DryRunFlag("n",
+                                cl::desc("Dry run (implies -v)"),
+                                cl::init(false));
+
+// Accept the hash on the command line to avoid having to include sha1
+// library with the LLVM code
+static cl::opt<std::string> BitcodeHash("hash",
+  cl::desc("Hash of bitcode (ignored if -u is given)"));
+
+const int kMaxBinaryHashLen = 32;
+
+// Convert ASCII hex hash to binary hash. return buffer and length.
+// The caller must free the returned buffer.
+static uint8_t* ParseBitcodeHash(int* len) {
+  if (BitcodeHash.size() > kMaxBinaryHashLen * 2 ||
+      BitcodeHash.size() % 2) return NULL;
+  *len = BitcodeHash.size() / 2;
+  uint8_t* buf = new uint8_t[*len];
+  const char* arg = BitcodeHash.data();
+  for (size_t i = 0; i < BitcodeHash.size() / 2; i++) {
+    unsigned int r; // glibc has %hhx but it's nonstandard
+    if (!isxdigit(*(arg + 2 * i + 1)) || // sscanf ignores trailing junk
+        !sscanf(arg + 2 * i, "%2x", &r) ||
+        r > std::numeric_limits<uint8_t>::max()) {
+      delete [] buf;
+      return NULL;
+    }
+    buf[i] = static_cast<uint8_t>(r);
+  }
+  return buf;
+}
+
+int main(const int argc, const char* argv[]) {
+  bool success = true;
+  cl::ParseCommandLineOptions(argc, argv, "bitcode wrapper/unwrapper\n");
+  if (OutputFilename == "") {
+    // Default to input file = output file. The cl lib doesn't seem to
+    // directly support initializing one opt from another.
+    OutputFilename = InputFilename;
+  }
+  if (DryRunFlag) VerboseFlag = true;
+  sys::fs::file_status outfile_status;
+  std::string outfile_temp;
+  outfile_temp = std::string(OutputFilename) + ".temp";
+  if (UnwrapFlag) {
+    FileWrapperInput inbc(InputFilename);
+    FileWrapperOutput outbc(outfile_temp);
+    BitcodeWrapperer wrapperer(&inbc, &outbc);
+    if (wrapperer.IsInputBitcodeWrapper()) {
+      if (VerboseFlag) {
+        fprintf(stderr, "Headers read from infile:\n");
+        wrapperer.PrintWrapperHeader();
+      }
+      if (DryRunFlag)
+        return 0;
+      success = wrapperer.GenerateRawBitcodeFile();
+    }
+  } else {
+    FileWrapperInput inbc(InputFilename);
+    FileWrapperOutput outbc(outfile_temp);
+    BitcodeWrapperer wrapperer(&inbc, &outbc);
+    if (BitcodeHash.size()) {
+      // SHA-2 hash is 256 bit
+      int hash_len;
+      uint8_t* buf = ParseBitcodeHash(&hash_len);
+      if (!buf) {
+        fprintf(stderr, "Bitcode hash must be a hex string <= 64 chars.\n");
+        exit(1);
+      }
+      BCHeaderField hash(BCHeaderField::kBitcodeHash, hash_len, buf);
+      wrapperer.AddHeaderField(&hash);
+    }
+    if (VerboseFlag) {
+      fprintf(stderr, "Headers generated:\n");
+      wrapperer.PrintWrapperHeader();
+    }
+    if (DryRunFlag)
+      return 0;
+    success = wrapperer.GenerateWrappedBitcodeFile();
+  }
+  error_code ec;
+  if ((ec = sys::fs::rename(outfile_temp, OutputFilename))) {
+    fprintf(stderr, "Could not rename temporary: %s\n", ec.message().c_str());
+    success = false;
+  }
+  if (success) return 0;
+  fprintf(stderr, "error: Unable to generate a proper %s bitcode file!\n",
+          (UnwrapFlag ? "unwrapped" : "wrapped"));
+  return 1;
+}
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index 496e31cc39..31812e1f8c 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -14,6 +14,10 @@ LINK_LIBS_IN_SHARED := 1
 SHARED_LIBRARY := 1
 LOADABLE_MODULE := 1
 
+# @LOCALMOD: this forces to appear -lLTO *after* the object file
+#            on the linkline. This is necessary for linking on ubuntu precise.
+#            Otherwise LLVMgold.so will not have a dt_needed entry for LTO
+EXTRA_LIBS := -lLTO
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
 
 # Include this here so we can get the configuration of the targets
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 40f5fd6086..1e96762d10 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -52,6 +52,25 @@ namespace {
   ld_plugin_set_extra_library_path set_extra_library_path = NULL;
   ld_plugin_get_view get_view = NULL;
   ld_plugin_message message = discard_message;
+  // @LOCALMOD-BEGIN
+  // REL, DYN, or EXEC
+  ld_plugin_output_file_type linker_output;
+
+  // Callback for getting link soname from gold
+  ld_plugin_get_output_soname get_output_soname = NULL;
+
+  // Callback for getting needed libraries from gold
+  ld_plugin_get_needed get_needed = NULL;
+
+  // Callback for getting number of needed library from gold
+  ld_plugin_get_num_needed get_num_needed = NULL;
+
+  // Callback for getting the number of --wrap'd symbols.
+  ld_plugin_get_num_wrapped get_num_wrapped = NULL;
+
+  // Callback for getting the name of a wrapped symbol.
+  ld_plugin_get_wrapped get_wrapped = NULL;
+  // @LOCALMOD-END
 
   int api_version = 0;
   int gold_version = 0;
@@ -59,11 +78,17 @@ namespace {
   struct claimed_file {
     void *handle;
     std::vector<ld_plugin_symbol> syms;
+    bool is_linked_in; // @LOCALMOD
   };
 
   lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
   std::string output_name = "";
   std::list<claimed_file> Modules;
+
+  // @LOCALMOD-BEGIN
+  std::vector<std::string> DepLibs;
+  // @LOCALMOD-END
+
   std::vector<sys::Path> Cleanup;
   lto_code_gen_t code_gen = NULL;
 }
@@ -71,6 +96,7 @@ namespace {
 namespace options {
   enum generate_bc { BC_NO, BC_ALSO, BC_ONLY };
   static bool generate_api_file = false;
+  static bool gather_then_link = true; // @LOCALMOD
   static generate_bc generate_bc_file = BC_NO;
   static std::string bc_path;
   static std::string obj_path;
@@ -100,6 +126,10 @@ namespace options {
       triple = opt.substr(strlen("mtriple="));
     } else if (opt.startswith("obj-path=")) {
       obj_path = opt.substr(strlen("obj-path="));
+      // @LOCALMOD-BEGIN
+    } else if (opt == "no-gather-then-link") {
+      gather_then_link = false;
+      // @LOCALMOD-END
     } else if (opt == "emit-llvm") {
       generate_bc_file = BC_ONLY;
     } else if (opt == "also-emit-llvm") {
@@ -120,6 +150,18 @@ namespace options {
   }
 }
 
+// @LOCALMOD-BEGIN
+static const char *get_basename(const char *path) {
+  if (path == NULL)
+    return NULL;
+  const char *slash = strrchr(path, '/');
+  if (slash)
+    return slash + 1;
+
+  return path;
+}
+// @LOCALMOD-END
+
 static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
                                         int *claimed);
 static ld_plugin_status all_symbols_read_hook(void);
@@ -147,6 +189,10 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         output_name = tv->tv_u.tv_string;
         break;
       case LDPT_LINKER_OUTPUT:
+        // @LOCALMOD-BEGIN
+        linker_output =
+          static_cast<ld_plugin_output_file_type>(tv->tv_u.tv_val);
+        // @LOCALMOD-END
         switch (tv->tv_u.tv_val) {
           case LDPO_REL:  // .o
           case LDPO_DYN:  // .so
@@ -210,7 +256,23 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
         break;
       case LDPT_GET_VIEW:
         get_view = tv->tv_u.tv_get_view;
+      // @LOCALMOD-BEGIN
+      case LDPT_GET_OUTPUT_SONAME:
+        get_output_soname = tv->tv_u.tv_get_output_soname;
         break;
+      case LDPT_GET_NEEDED:
+        get_needed = tv->tv_u.tv_get_needed;
+        break;
+      case LDPT_GET_NUM_NEEDED:
+        get_num_needed = tv->tv_u.tv_get_num_needed;
+        break;
+      case LDPT_GET_WRAPPED:
+        get_wrapped = tv->tv_u.tv_get_wrapped;
+        break;
+      case LDPT_GET_NUM_WRAPPED:
+        get_num_wrapped = tv->tv_u.tv_get_num_wrapped;
+        break;
+      // @LOCALMOD-END
       case LDPT_MESSAGE:
         message = tv->tv_u.tv_message;
         break;
@@ -228,6 +290,24 @@ ld_plugin_status onload(ld_plugin_tv *tv) {
     return LDPS_ERR;
   }
 
+  // @LOCALMOD-BEGIN
+  // Parse extra command-line options
+  // Although lto_codegen provides a way to parse command-line arguments,
+  // we need the arguments to be parsed and applied before LTOModules are
+  // even created. In particular, this is needed because the
+  // "-add-nacl-read-tp-dependency" flag affects how modules are created.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      lto_add_command_line_option((*it).c_str());
+    }
+    lto_parse_command_line_options();
+    // We clear the options so that they don't get parsed again in
+    // lto_codegen_debug_options.
+    options::extra.clear();
+  }
+  // @LOCALMOD-END
+
   return LDPS_OK;
 }
 
@@ -294,7 +374,21 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     ld_plugin_symbol &sym = cf.syms.back();
     sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
     sym.name = strdup(sym.name);
+    // @LOCALMOD-BEGIN
+    // Localmods have disabled the use of the 'version' field for passing
+    // version information to Gold. Instead, the version is now transmitted as
+    // part of the 'name' field, which has the form "sym@VER" or "sym@@VER".
+    // This is nicer because it communicates one extra bit of information (@@
+    // marks the default version), and allows us to access the real symbol
+    // name in all_symbols_read.
+
+    // These fields are set by Gold to communicate the updated version info
+    // to the plugin. They are used in all_symbols_read_hook().
+    // Initialize them for predictability.
     sym.version = NULL;
+    sym.is_default = false;
+    sym.dynfile = NULL;
+    // @LOCALMOD-END
 
     int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
     switch (scope) {
@@ -343,18 +437,45 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
   }
 
   cf.syms.reserve(cf.syms.size());
+  // @LOCALMOD-BEGIN
+  bool is_shared =
+    (lto_module_get_output_format(M) == LTO_OUTPUT_FORMAT_SHARED);
+  const char* soname = lto_module_get_soname(M);
+  if (soname[0] == '\0')
+    soname = NULL;
+  // @LOCALMOD-END
 
   if (!cf.syms.empty()) {
-    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
+    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0],
+                       is_shared, soname) != LDPS_OK) { // @LOCALMOD
       (*message)(LDPL_ERROR, "Unable to add symbols!");
       return LDPS_ERR;
     }
   }
 
-  if (code_gen)
-    lto_codegen_add_module(code_gen, M);
+  // @LOCALMOD-BEGIN
+  // Do not merge the module if it's a PSO.
+  // If the PSO's soname is set, add it to DepLibs.
+  cf.is_linked_in = false;
+  if (code_gen) {
+    if (is_shared) {
+      if (soname && strlen(soname) > 0) {
+        DepLibs.push_back(soname);
+      }
+    } else {
+      if (options::gather_then_link) {
+        lto_codegen_gather_module_for_link(code_gen, M);
+      } else {
+        lto_codegen_add_module(code_gen, M);
+      }
+      cf.is_linked_in = true;
+    }
+  }
 
-  lto_module_dispose(M);
+  // With gather_then_link, the modules are disposed when linking.
+  if (!options::gather_then_link)
+    lto_module_dispose(M);
+  // @LOCALMOD-END
 
   return LDPS_OK;
 }
@@ -367,6 +488,12 @@ static ld_plugin_status all_symbols_read_hook(void) {
   std::ofstream api_file;
   assert(code_gen);
 
+  // @LOCALMOD-BEGIN
+  if (options::gather_then_link) {
+    lto_codegen_link_gathered_modules_and_dispose(code_gen);
+  }
+  // @LOCALMOD-END
+
   if (options::generate_api_file) {
     api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
     if (!api_file.is_open()) {
@@ -381,12 +508,45 @@ static ld_plugin_status all_symbols_read_hook(void) {
       continue;
     (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
     for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
+      // @LOCALMOD-BEGIN
+      // Don't process the symbols inside a dynamic object.
+      if (!I->is_linked_in)
+        continue;
+      // @LOCALMOD-END
+
       if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
+        // @LOCALMOD-BEGIN
+        // Set the symbol version in the module.
+        if (linker_output != LDPO_REL && I->syms[i].version) {
+          // NOTE: This may change the name of the symbol, so it must happen
+          // before the call to lto_codegen_add_must_preserve_symbols() below.
+          I->syms[i].name = const_cast<char *>(
+            lto_codegen_set_symbol_def_version(code_gen, I->syms[i].name,
+                                               I->syms[i].version,
+                                               I->syms[i].is_default));
+        }
         lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
+        // @LOCALMOD-END
 
         if (options::generate_api_file)
           api_file << I->syms[i].name << "\n";
       }
+      // @LOCALMOD-BEGIN
+      else if (linker_output != LDPO_REL &&
+               (I->syms[i].resolution == LDPR_RESOLVED_DYN ||
+                I->syms[i].resolution == LDPR_UNDEF)) {
+        // This symbol is provided by an external object.
+        // Set the version and source dynamic file for it.
+        const char *ver = I->syms[i].version;
+        const char *dynfile = I->syms[i].dynfile;
+        dynfile = get_basename(dynfile);
+        // NOTE: This may change the name of the symbol.
+        I->syms[i].name = const_cast<char *>(
+          lto_codegen_set_symbol_needed(code_gen, I->syms[i].name,
+                                        ver ? ver : "",
+                                        dynfile ? dynfile : ""));
+      }
+      // @LOCALMOD-END
     }
   }
 
@@ -398,6 +558,11 @@ static ld_plugin_status all_symbols_read_hook(void) {
   if (!options::mcpu.empty())
     lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
 
+  // @LOCALMOD-BEGIN (COMMENT)
+  // "extra" will always be empty below, because we process the extra
+  // options earlier, at the end of onload().
+  // @LOCALMOD-END
+
   // Pass through extra options to the code generator.
   if (!options::extra.empty()) {
     for (std::vector<std::string>::iterator it = options::extra.begin();
@@ -406,6 +571,57 @@ static ld_plugin_status all_symbols_read_hook(void) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Store the linker output format into the bitcode.
+  lto_output_format format;
+  switch (linker_output) {
+    case LDPO_REL:
+      format = LTO_OUTPUT_FORMAT_OBJECT;
+      break;
+    case LDPO_DYN:
+      format = LTO_OUTPUT_FORMAT_SHARED;
+      break;
+    case LDPO_EXEC:
+      format = LTO_OUTPUT_FORMAT_EXEC;
+      break;
+    default:
+      (*message)(LDPL_FATAL, "Unknown linker output format (gold-plugin)");
+      abort();
+      break;
+  }
+  lto_codegen_set_merged_module_output_format(code_gen, format);
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // For -shared linking, store the soname into the bitcode.
+  if (linker_output == LDPO_DYN) {
+    const char *soname = (*get_output_soname)();
+    lto_codegen_set_merged_module_soname(code_gen, soname);
+  }
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // Add the needed libraries to the bitcode.
+  unsigned int num_needed = (*get_num_needed)();
+  for (unsigned i=0; i < num_needed; ++i) {
+    const char *soname = (*get_needed)(i);
+    soname = get_basename(soname);
+    lto_codegen_add_merged_module_library_dep(code_gen, soname);
+  }
+  for (std::vector<std::string>::iterator I = DepLibs.begin(),
+           E = DepLibs.end(); I != E; ++I) {
+    lto_codegen_add_merged_module_library_dep(code_gen, I->c_str());
+  }
+  // @LOCALMOD-END
+
+  // @LOCALMOD-BEGIN
+  // Perform symbol wrapping.
+  unsigned int num_wrapped = (*get_num_wrapped)();
+  for (unsigned i=0; i < num_wrapped; ++i) {
+    const char *sym = (*get_wrapped)(i);
+    lto_codegen_wrap_symbol_in_merged_module(code_gen, sym);
+  }
+  // @LOCALMOD-END
   if (options::generate_bc_file != options::BC_NO) {
     std::string path;
     if (options::generate_bc_file == options::BC_ONLY)
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index d6f191961d..bb9afce271 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -77,8 +77,11 @@ static void WriteOutputFile(const Module *M) {
     exit(1);
   }
 
-  if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
+  // @LOCALMOD-BEGIN
+  if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) {
     WriteBitcodeToFile(M, Out->os());
+  }
+  // @LOCALMOD-END
 
   // Declare success.
   Out->keep();
diff --git a/tools/llvm-dis/CMakeLists.txt b/tools/llvm-dis/CMakeLists.txt
index 9f12ecb666..d9883a2147 100644
--- a/tools/llvm-dis/CMakeLists.txt
+++ b/tools/llvm-dis/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS bitreader analysis)
+set(LLVM_LINK_COMPONENTS bitreader naclbitreader analysis)
 
 add_llvm_tool(llvm-dis
   llvm-dis.cpp
diff --git a/tools/llvm-dis/LLVMBuild.txt b/tools/llvm-dis/LLVMBuild.txt
index 4525010c1f..cf1cbf7a40 100644
--- a/tools/llvm-dis/LLVMBuild.txt
+++ b/tools/llvm-dis/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-dis
 parent = Tools
-required_libraries = Analysis BitReader
+required_libraries = Analysis BitReader NaClBitReader
diff --git a/tools/llvm-dis/Makefile b/tools/llvm-dis/Makefile
index aeeeed0d68..0719006a15 100644
--- a/tools/llvm-dis/Makefile
+++ b/tools/llvm-dis/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-dis
-LINK_COMPONENTS := bitreader analysis
+LINK_COMPONENTS := bitreader naclbitreader analysis
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 067955e5cc..db9ca40f45 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -19,10 +19,12 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Assembly/AssemblyAnnotationWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"  // @LOCALMOD
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"  // @LOCALMOD
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
@@ -51,6 +53,23 @@ static cl::opt<bool>
 ShowAnnotations("show-annotations",
                 cl::desc("Add informational comments to the .ll file"));
 
+// @LOCALMOD-BEGIN
+// Print bitcode metadata only, in text format.
+// (includes output format, soname, and dependencies).
+static cl::opt<bool>
+DumpMetadata("dump-metadata", cl::desc("Dump bitcode metadata"));
+
+static cl::opt<NaClFileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input bitcode file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM bitcode file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+// @LOCALMOD-END
+
 namespace {
 
 static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
@@ -133,8 +152,22 @@ int main(int argc, char **argv) {
       DisplayFilename = "<stdin>";
     else
       DisplayFilename = InputFilename;
-    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
-                                     &ErrorMessage));
+
+    // @LOCALMOD-BEGIN
+    switch (InputFileFormat) {
+      case LLVMFormat:
+        M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                         &ErrorMessage));
+        break;
+      case PNaClFormat:
+        M.reset(getNaClStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                             &ErrorMessage));
+        break;
+      default:
+        ErrorMessage = "Don't understand specified bitcode format";
+        break;
+    }
+    // @LOCALMOD-END
     if(M.get() != 0 && M->MaterializeAllPermanently(&ErrorMessage)) {
       M.reset();
     }
@@ -154,7 +187,7 @@ int main(int argc, char **argv) {
     OutputFilename = "-";
 
   if (OutputFilename.empty()) { // Unspecified output, infer it.
-    if (InputFilename == "-") {
+    if (InputFilename == "-" || DumpMetadata) { // @LOCALMOD
       OutputFilename = "-";
     } else {
       const std::string &IFN = InputFilename;
@@ -176,6 +209,14 @@ int main(int argc, char **argv) {
     return 1;
   }
 
+  // @LOCALMOD-BEGIN
+  if (DumpMetadata) {
+    M->dumpMeta(Out->os());
+    Out->keep();
+    return 0;
+  }
+  // @LOCALMOD-END
+
   OwningPtr<AssemblyAnnotationWriter> Annotator;
   if (ShowAnnotations)
     Annotator.reset(new CommentWriter());
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 2f45b4eae5..8108996996 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -22,6 +22,8 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h" // @LOCALMOD
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Regex.h"
@@ -48,6 +50,18 @@ Force("f", cl::desc("Enable binary output on terminals"));
 static cl::opt<bool>
 DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
 
+// @LOCALMOD-BEGIN
+static cl::opt<unsigned>
+Divisor("divisor",
+        cl::init(0),
+        cl::desc("select GV by position (pos % divisor = remainder "));
+
+static cl::opt<unsigned>
+Remainder("remainder",
+          cl::init(0),
+          cl::desc("select GV by position (pos % divisor = remainder "));
+// @LOCALMOD-END
+
 // ExtractFuncs - The functions to extract from the module.
 static cl::list<std::string>
 ExtractFuncs("func", cl::desc("Specify function to extract"),
@@ -179,6 +193,24 @@ int main(int argc, char **argv) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Extract globals via modulo operation.
+  size_t count_globals = 0;
+  if (Divisor != 0) {
+    size_t pos = 0;
+    for (Module::global_iterator GV = M->global_begin(), E = M->global_end();
+         GV != E;
+         GV++, pos++) {
+      if (pos % Divisor == Remainder) {
+        GVs.insert(&*GV);
+      }
+    }
+    dbgs() << "total globals: " <<  pos << "\n";
+    count_globals = GVs.size();
+    dbgs() << "selected globals: " << count_globals  << "\n";
+  }
+  // @LOCALMOD-END
+  
   // Figure out which functions we should extract.
   for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {
     GlobalValue *GV = M->getFunction(ExtractFuncs[i]);
@@ -213,6 +245,22 @@ int main(int argc, char **argv) {
     }
   }
 
+  // @LOCALMOD-BEGIN
+  // Extract functions via modulo operation.
+  if (Divisor != 0) {
+    size_t pos = 0;
+    for (Module::iterator F = M->begin(), E = M->end();
+         F != E;
+         F++, pos++) {
+       if (pos % Divisor == Remainder) {
+         GVs.insert(&*F);
+      }
+    }
+    dbgs() << "total functions: " <<  pos << "\n";
+    dbgs() << "selected functions: " << GVs.size() - count_globals  << "\n";
+  }
+  // @LOCALMOD-END
+  
   // Materialize requisite global values.
   if (!DeleteFn)
     for (size_t i = 0, e = GVs.size(); i != e; ++i) {
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index 4df53564e1..a414e5ac7f 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser irreader)
+set(LLVM_LINK_COMPONENTS linker bitreader bitwriter naclbitwriter asmparser irreader)
 
 add_llvm_tool(llvm-link
   llvm-link.cpp
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 4b01c33504..db2628fbf8 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCNaCl.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -450,6 +451,11 @@ int main(int argc, char **argv) {
     Str.reset(TheTarget->createMCObjectStreamer(TripleName, Ctx, *MAB,
                                                 FOS, CE, RelaxAll,
                                                 NoExecStack));
+    // @LOCALMOD-BEGIN
+    Triple T(TripleName);
+    if (T.isOSNaCl())
+      initializeNaClMCStreamer(*Str.get(), Ctx, T);
+    // @LOCALMOD-END
   }
 
   int Res = 1;
diff --git a/tools/llvm-nm/CMakeLists.txt b/tools/llvm-nm/CMakeLists.txt
index b6cd80b477..de06ca28d9 100644
--- a/tools/llvm-nm/CMakeLists.txt
+++ b/tools/llvm-nm/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS archive bitreader object)
+set(LLVM_LINK_COMPONENTS archive bitreader naclbitreader object)
 
 add_llvm_tool(llvm-nm
   llvm-nm.cpp
diff --git a/tools/llvm-nm/LLVMBuild.txt b/tools/llvm-nm/LLVMBuild.txt
index 38ecbfd2e6..baba9ca50b 100644
--- a/tools/llvm-nm/LLVMBuild.txt
+++ b/tools/llvm-nm/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-nm
 parent = Tools
-required_libraries = Archive BitReader Object
+required_libraries = Archive BitReader NaClBitReader Object
diff --git a/tools/llvm-nm/Makefile b/tools/llvm-nm/Makefile
index d9cee98995..fe208a8f24 100644
--- a/tools/llvm-nm/Makefile
+++ b/tools/llvm-nm/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-nm
-LINK_COMPONENTS := archive bitreader object
+LINK_COMPONENTS := archive bitreader naclbitreader object
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index a24aae6061..a4f9ab0730 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -19,6 +19,8 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Bitcode/Archive.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h" // @LOCALMOD
+#include "llvm/IRReader/IRReader.h"             // @LOCALMOD
 #include "llvm/IR/Module.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ObjectFile.h"
@@ -122,6 +124,18 @@ namespace {
   bool MultipleFiles = false;
 
   std::string ToolName;
+
+  // @LOCALMOD-BEGIN
+  cl::opt<NaClFileFormat>
+  InputFileFormat(
+      "bitcode-format",
+      cl::desc("Define format of input file:"),
+      cl::values(
+          clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
+          clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+          clEnumValEnd),
+      cl::init(LLVMFormat));
+  // @LOCALMOD-END
 }
 
 
@@ -345,7 +359,20 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
 
   LLVMContext &Context = getGlobalContext();
   std::string ErrorMessage;
-  if (magic == sys::fs::file_magic::bitcode) {
+  // @LOCALMOD-BEGIN
+  // Support parsing PNaCl bitcode files
+  if (InputFileFormat == PNaClFormat) {
+    Module *Result = NaClParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
+    if (Result) {
+      DumpSymbolNamesFromModule(Result);
+      delete Result;
+    } else {
+      error(ErrorMessage, Filename);
+      return;
+    }
+  }
+  // @LOCALMOD-END
+  else if (magic == sys::fs::file_magic::bitcode) {
     Module *Result = 0;
     Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
     if (Result) {
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 57e7a2d07f..75c718c019 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -98,6 +98,68 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
   return ret;
 }
 
+// @LOCALMOD-BEGIN
+/// Add a module that will be merged with the final output module.
+/// The merging does not happen until linkGatheredModulesAndDispose().
+void LTOCodeGenerator::gatherModuleForLinking(LTOModule* mod) {
+  _gatheredModules.push_back(mod);
+}
+
+/// Merge all modules gathered from gatherModuleForLinking(), and
+/// destroy the source modules in the process.
+bool LTOCodeGenerator::linkGatheredModulesAndDispose(std::string& errMsg) {
+
+  // We gather the asm undefs earlier than addModule() does,
+  // since we delete the modules during linking, and would not be
+  // able to do this after linking.  The undefs vector contain lists
+  // of global variable names which are considered "used", which will be
+  // appended into the "llvm.compiler.used" list.  The names must be the
+  // same before linking as they are after linking, since we have switched
+  // the order.
+  for (unsigned i = 0, ei = _gatheredModules.size(); i != ei; ++i) {
+    const std::vector<const char*> &undefs =
+        _gatheredModules[i]->getAsmUndefinedRefs();
+    for (int j = 0, ej = undefs.size(); j != ej; ++j) {
+      _asmUndefinedRefs[undefs[j]] = 1;
+    }
+  }
+
+  // Tree-reduce the mods, re-using the incoming mods as scratch
+  // intermediate results.  Module i is linked with (i + stride), with i as
+  // the dest.  We begin with a stride of 1, and double each time.  E.g.,
+  // after the first round, only the even-indexed modules are still available,
+  // and after the second, only those with index that are a multiple of 4
+  // are available.  Eventually the Module with the content of all other modules
+  // will be Module 0.
+  // NOTE: we may be able to be smarter about linking if we did not do them
+  // pairwise using Linker::LinkModules.  We also disregard module sizes
+  // and try our best to keep the modules in order (linking adjacent modules).
+  for (unsigned stride = 1, len = _gatheredModules.size();
+       stride < len;
+       stride *= 2) {
+    for (unsigned i = 0; i + stride < len; i = i + (stride * 2)) {
+      if (Linker::LinkModules(_gatheredModules[i]->getLLVVMModule(),
+                              _gatheredModules[i+stride]->getLLVVMModule(),
+                              Linker::DestroySource, &errMsg)) {
+        errs() << "LinkModules " << i << " w/ " << i + stride << " failed...\n";
+        // We leak the memory in this case...
+        return true;
+      }
+      delete _gatheredModules[i+stride];
+    }
+  }
+
+  // Finally, link Node 0 with the Dest and delete Node 0.
+  if (_linker.LinkInModule(_gatheredModules[0]->getLLVVMModule(), &errMsg)) {
+    errs() << "LinkModules Dst w/ _gatheredModules[0] failed...\n";
+    return true;
+  }
+  delete _gatheredModules[0];
+
+  return false;
+}
+// @LOCALMOD-END
+
 bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug,
                                     std::string& errMsg) {
   switch (debug) {
@@ -124,6 +186,83 @@ bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model,
   llvm_unreachable("Unknown PIC model!");
 }
 
+// @LOCALMOD-BEGIN
+void LTOCodeGenerator::setMergedModuleOutputFormat(lto_output_format format)
+{
+  Module::OutputFormat outputFormat;
+  switch (format) {
+  case LTO_OUTPUT_FORMAT_OBJECT:
+    outputFormat = Module::ObjectOutputFormat;
+    break;
+  case LTO_OUTPUT_FORMAT_SHARED:
+    outputFormat = Module::SharedOutputFormat;
+    break;
+  case LTO_OUTPUT_FORMAT_EXEC:
+    outputFormat = Module::ExecutableOutputFormat;
+    break;
+  default:
+    llvm_unreachable("Unexpected output format");
+  }
+  Module *mergedModule = _linker.getModule();
+  mergedModule->setOutputFormat(outputFormat);
+}
+
+void LTOCodeGenerator::setMergedModuleSOName(const char *soname)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->setSOName(soname);
+}
+
+void LTOCodeGenerator::addLibraryDep(const char *lib)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->addLibrary(lib);
+}
+
+void LTOCodeGenerator::wrapSymbol(const char *sym)
+{
+  Module *mergedModule = _linker.getModule();
+  mergedModule->wrapSymbol(sym);
+}
+
+const char* LTOCodeGenerator::setSymbolDefVersion(const char *sym,
+                                                  const char *ver,
+                                                  bool is_default)
+{
+  Module *mergedModule = _linker.getModule();
+  GlobalValue *GV = mergedModule->getNamedValue(sym);
+  if (!GV) {
+    llvm_unreachable("Invalid global in setSymbolDefVersion");
+  }
+  GV->setVersionDef(ver, is_default);
+  return strdup(GV->getName().str().c_str());
+}
+
+const char* LTOCodeGenerator::setSymbolNeeded(const char *sym,
+                                              const char *ver,
+                                              const char *dynfile)
+{
+  Module *mergedModule = _linker.getModule();
+  GlobalValue *GV = mergedModule->getNamedValue(sym);
+  if (!GV) {
+    // Symbol lookup may have failed because this symbol was already
+    // renamed for versioning. Make sure this is the case.
+    if (strchr(sym, '@') != NULL || ver == NULL || ver[0] == '\0') {
+      llvm_unreachable("Unexpected condition in setSymbolNeeded");
+    }
+    std::string NewName = std::string(sym) + "@" + ver;
+    GV = mergedModule->getNamedValue(NewName);
+  }
+  if (!GV) {
+    // Ignore failures due to unused declarations.
+    // This caused a falure to build libppruntime.so for glibc.
+    // TODO(sehr): better document under which circumstances this is needed.
+    return sym;
+  }
+  GV->setNeeded(ver, dynfile);
+  return strdup(GV->getName().str().c_str());
+}
+// @LOCALMOD-END
 bool LTOCodeGenerator::writeMergedModules(const char *path,
                                           std::string &errMsg) {
   if (determineTarget(errMsg))
@@ -142,7 +281,6 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
     return true;
   }
 
-  // write bitcode to it
   WriteBitcodeToFile(_linker.getModule(), Out.os());
   Out.os().close();
 
diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h
index a4ade9fd26..4cc3928340 100644
--- a/tools/lto/LTOCodeGenerator.h
+++ b/tools/lto/LTOCodeGenerator.h
@@ -41,6 +41,12 @@ struct LTOCodeGenerator {
   ~LTOCodeGenerator();
 
   bool addModule(struct LTOModule*, std::string &errMsg);
+  // @LOCALMOD-BEGIN
+  // Alternative methods of adding modules, which delay merging modules until
+  // all modules are available.
+  void gatherModuleForLinking(struct LTOModule*);
+  bool linkGatheredModulesAndDispose(std::string &errMsg);
+  // @LOCALMOD-END
   bool setDebugInfo(lto_debug_model, std::string &errMsg);
   bool setCodePICModel(lto_codegen_model, std::string &errMsg);
 
@@ -51,6 +57,16 @@ struct LTOCodeGenerator {
   }
 
   bool writeMergedModules(const char *path, std::string &errMsg);
+  // @LOCALMOD-BEGIN
+  void                setMergedModuleOutputFormat(lto_output_format format);
+  void                setMergedModuleSOName(const char *soname);
+  void                addLibraryDep(const char *lib);
+  void                wrapSymbol(const char *sym);
+  const char*         setSymbolDefVersion(const char *sym, const char *ver,
+                                          bool is_default);
+  const char*         setSymbolNeeded(const char *sym, const char *ver,
+                                      const char *dynfile);
+  // @LOCALMOD-END
   bool compile_to_file(const char **name, std::string &errMsg);
   const void *compile(size_t *length, std::string &errMsg);
   void setCodeGenDebugOptions(const char *opts);
@@ -78,6 +94,9 @@ private:
   std::vector<char*>          _codegenOptions;
   std::string                 _mCpu;
   std::string                 _nativeObjectPath;
+
+  // @LOCALMOD
+  std::vector<LTOModule*> _gatheredModules;
 };
 
 #endif // LTO_CODE_GENERATOR_H
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index d805f49f9a..2f98517c1c 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCTargetAsmParser.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h" // @LOCALMOD
 #include "llvm/Support/Host.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -271,7 +272,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   }
 
   // parse bitcode buffer
-  OwningPtr<Module> m(getLazyBitcodeModule(buffer, getGlobalContext(),
+  OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), // @LOCALMOD
                                            &errMsg));
   if (!m) {
     delete buffer;
@@ -304,6 +305,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   getTargetOptions(Options);
   TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
                                                      Options);
+
   LTOModule *Ret = new LTOModule(m.take(), target);
   if (Ret->parseSymbols(errMsg)) {
     delete Ret;
@@ -319,6 +321,36 @@ MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
   return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false);
 }
 
+// @LOCALMOD-BEGIN
+lto_output_format LTOModule::getOutputFormat() {
+  Module::OutputFormat format = _module->getOutputFormat();
+  switch (format) {
+  case Module::ObjectOutputFormat: return LTO_OUTPUT_FORMAT_OBJECT;
+  case Module::SharedOutputFormat: return LTO_OUTPUT_FORMAT_SHARED;
+  case Module::ExecutableOutputFormat: return LTO_OUTPUT_FORMAT_EXEC;
+  }
+  llvm_unreachable("Unknown output format in LTOModule");
+}
+
+const char *LTOModule::getSOName() {
+  return _module->getSOName().c_str();
+}
+
+const char* LTOModule::getLibraryDep(uint32_t index) {
+  /* make it compile until we bring back deplibs
+  const Module::LibraryListType &Libs = _module->getLibraries();
+  if (index < Libs.size())
+    return Libs[index].c_str();
+  */
+  return NULL;
+}
+
+uint32_t LTOModule::getNumLibraryDeps() {
+  //return _module->getLibraries().size();
+  return 0;
+}
+// @LOCALMOD-END
+
 /// objcClassNameFromExpression - Get string that the data pointer points to.
 bool
 LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) {
@@ -614,6 +646,14 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) {
   if (decl->getName().startswith("llvm."))
     return;
 
+  // @LOCALMOD-BEGIN
+  // Bitcode modules may have declarations for functions or globals
+  // which are unused. Ignore them here so that gold does not mistake
+  // them for undefined symbols.
+  if (decl->use_empty())
+    return;
+  // @LOCALMOD-END
+
   // ignore all aliases
   if (isa<GlobalAlias>(decl))
     return;
@@ -800,6 +840,7 @@ namespace {
                                    unsigned MaxBytesToEmit) {}
     virtual bool EmitValueToOffset(const MCExpr *Offset,
                                    unsigned char Value ) { return false; }
+
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
diff --git a/tools/lto/LTOModule.h b/tools/lto/LTOModule.h
index 83f3a7def1..6f97699e90 100644
--- a/tools/lto/LTOModule.h
+++ b/tools/lto/LTOModule.h
@@ -99,6 +99,14 @@ public:
     _module->setTargetTriple(triple);
   }
 
+  // @LOCALMOD-BEGIN
+  lto_output_format        getOutputFormat();
+  const char*              getSOName();
+  const char*              getLibraryDep(uint32_t index);
+  uint32_t                 getNumLibraryDeps();
+  // @LOCALMOD-END
+
+
   /// getSymbolCount - Get the number of symbols
   uint32_t getSymbolCount() {
     return _symbols.size();
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index ab2e16e5fa..c13a0ba7f6 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -57,3 +57,11 @@ ifeq ($(HOST_OS),Darwin)
                           -Wl,-object_path_lto -Wl,$(TempFile)
     endif
 endif
+
+#@ LOCALMOD-BEGIN
+# This is to fix an upstream bug. It is in the process of being upstreamed.
+# This line can be removed after it has been fixed upstream and we've merged.
+ifneq ($(HOST_OS),Darwin)
+  LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-soname=$(SharedPrefix)LTO$(SHLIBEXT)
+endif
+#@ LOCALMOD-END
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 11ad532be8..1915acbdae 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -13,6 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/lto.h"
+#include "llvm/Support/CommandLine.h" // @LOCALMOD
+
 #include "LTOCodeGenerator.h"
 #include "LTOModule.h"
 #include "llvm-c/Core.h"
@@ -22,6 +24,25 @@
 // *** Not thread safe ***
 static std::string sLastErrorString;
 
+// @LOCALMOD-BEGIN
+static std::vector<const char*> lto_options;
+extern void lto_add_command_line_option(const char* opt)
+{
+  // ParseCommandLineOptions() expects argv[0] to be program name.
+  if (lto_options.empty())
+    lto_options.push_back("libLTO");
+
+  lto_options.push_back(strdup(opt));
+}
+
+extern void lto_parse_command_line_options()
+{
+  if ( !lto_options.empty() )
+      llvm::cl::ParseCommandLineOptions(lto_options.size(),
+                                        const_cast<char **>(&lto_options[0]));
+}
+// @LOCALMOD-END
+
 /// lto_get_version - Returns a printable string.
 extern const char* lto_get_version() {
   return LTOCodeGenerator::getVersionString();
@@ -106,6 +127,45 @@ void lto_module_set_target_triple(lto_module_t mod, const char *triple) {
   return mod->setTargetTriple(triple);
 }
 
+// @LOCALMOD-BEGIN
+
+//
+// Get the module format for this module
+//
+lto_output_format lto_module_get_output_format(lto_module_t mod)
+{
+  return mod->getOutputFormat();
+}
+
+//
+// Get the module soname
+//
+const char* lto_module_get_soname(lto_module_t mod)
+{
+  return mod->getSOName();
+}
+
+//
+// Get the i'th library dependency.
+// Returns NULL if i >= lto_module_get_num_library_deps()
+//
+const char *
+lto_module_get_library_dep(lto_module_t mod, unsigned int i)
+{
+  return mod->getLibraryDep(i);
+}
+
+//
+// Return the number of library dependencies of this module.
+//
+unsigned int
+lto_module_get_num_library_deps(lto_module_t mod)
+{
+  return mod->getNumLibraryDeps();
+}
+
+// @LOCALMOD-END
+
 /// lto_module_get_num_symbols - Returns the number of symbols in the object
 /// module.
 unsigned int lto_module_get_num_symbols(lto_module_t mod) {
@@ -144,6 +204,16 @@ bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod) {
   return cg->addModule(mod, sLastErrorString);
 }
 
+// @LOCALMOD-BEGIN
+void lto_codegen_gather_module_for_link(lto_code_gen_t cg, lto_module_t mod) {
+  cg->gatherModuleForLinking(mod);
+}
+
+bool lto_codegen_link_gathered_modules_and_dispose(lto_code_gen_t cg) {
+  return cg->linkGatheredModulesAndDispose(sLastErrorString);
+}
+// @LOCALMOD-END
+
 /// lto_codegen_set_debug_model - Sets what if any format of debug info should
 /// be generated. Returns true on error (check lto_get_error_message() for
 /// details).
@@ -182,6 +252,77 @@ void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg,
   cg->addMustPreserveSymbol(symbol);
 }
 
+// @LOCALMOD-BEGIN
+
+//
+// Set the module format for the merged module
+//
+void lto_codegen_set_merged_module_output_format(lto_code_gen_t cg,
+                                                 lto_output_format format)
+{
+  cg->setMergedModuleOutputFormat(format);
+}
+
+//
+// Set the module soname (for shared library bitcode)
+//
+void lto_codegen_set_merged_module_soname(lto_code_gen_t cg,
+                                          const char* soname)
+{
+  cg->setMergedModuleSOName(soname);
+}
+
+//
+// Add a library dependency to the linked bitcode module.
+//
+void lto_codegen_add_merged_module_library_dep(lto_code_gen_t cg,
+                                               const char* soname)
+{
+  cg->addLibraryDep(soname);
+}
+
+//
+// Apply symbol wrapping in the linked bitcode module.
+//
+void lto_codegen_wrap_symbol_in_merged_module(lto_code_gen_t cg,
+                                              const char* sym) {
+  cg->wrapSymbol(sym);
+}
+
+//
+// Set the symbol version of defined symbol 'sym'.
+// 'sym' is the name of the GlobalValue, exactly as it is
+// in the LLVM module. It may already have a version suffix.
+// In that case, this function verifies that the old version
+// and new version match.
+// Returns a reference to the new name.
+//
+const char *
+lto_codegen_set_symbol_def_version(lto_code_gen_t cg,
+                                   const char *sym,
+                                   const char *version,
+                                   bool is_default) {
+  return cg->setSymbolDefVersion(sym, version, is_default);
+}
+
+//
+// Set the symbol version of needed symbol 'sym' from file 'dynfile'.
+// 'sym' is the name of the GlobalValue, exactly as it is
+// in the LLVM module. It may already have a version suffix.
+// In that case, this function verifies that the old version
+// and new version match.
+// In any case, it adds a NeededRecord entry.
+// Returns a reference to the new name.
+//
+const char*
+lto_codegen_set_symbol_needed(lto_code_gen_t cg,
+                              const char *sym,
+                              const char *version,
+                              const char *dynfile) {
+  return cg->setSymbolNeeded(sym, version, dynfile);
+}
+// @LOCALMOD-END
+
 /// lto_codegen_write_merged_modules - Writes a new file at the specified path
 /// that contains the merged contents of all modules added so far. Returns true
 /// on error (check lto_get_error_message() for details).
diff --git a/tools/lto/lto.exports b/tools/lto/lto.exports
index 46d0d74c82..10d2fe03f6 100644
--- a/tools/lto/lto.exports
+++ b/tools/lto/lto.exports
@@ -1,3 +1,5 @@
+lto_add_command_line_option
+lto_parse_command_line_options
 lto_get_error_message
 lto_get_version
 lto_initialize_disassembler
@@ -10,16 +12,25 @@ lto_module_get_symbol_attribute
 lto_module_get_symbol_name
 lto_module_get_target_triple
 lto_module_set_target_triple
+lto_module_get_output_format
+lto_module_get_soname
+lto_module_get_library_dep
+lto_module_get_num_library_deps
 lto_module_is_object_file
 lto_module_is_object_file_for_target
 lto_module_is_object_file_in_memory
 lto_module_is_object_file_in_memory_for_target
 lto_module_dispose
 lto_codegen_add_module
+lto_codegen_gather_module_for_link
+lto_codegen_link_gathered_modules_and_dispose
 lto_codegen_add_must_preserve_symbol
 lto_codegen_compile
 lto_codegen_create
 lto_codegen_dispose
+lto_codegen_set_assembler_args
+lto_codegen_set_assembler_path
+lto_codegen_set_cpu
 lto_codegen_set_debug_model
 lto_codegen_set_pic_model
 lto_codegen_write_merged_modules
@@ -27,6 +38,12 @@ lto_codegen_debug_options
 lto_codegen_set_assembler_args
 lto_codegen_set_assembler_path
 lto_codegen_set_cpu
+lto_codegen_set_merged_module_output_format
+lto_codegen_set_merged_module_soname
+lto_codegen_add_merged_module_library_dep
+lto_codegen_set_symbol_def_version
+lto_codegen_set_symbol_needed
+lto_codegen_wrap_symbol_in_merged_module
 lto_codegen_compile_to_file
 LLVMCreateDisasm
 LLVMCreateDisasmCPU
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 91959119e4..b308fa7264 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter irreader instrumentation scalaropts objcarcopts ipo vectorize)
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser bitwriter irreader naclbitwriter naclbitreader instrumentation naclanalysis nacltransforms scalaropts objcarcopts ipo vectorize)
 
 add_llvm_tool(opt
   AnalysisWrappers.cpp
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index 77b94469ed..6cf3a79219 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = opt
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC all-targets
+required_libraries = AsmParser BitReader BitWriter IRReader NaClBitWriter IPO Instrumentation Scalar ObjCARC all-targets NaClTransforms NaClAnalysis
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index a451005574..5413125972 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,6 +9,6 @@
 
 LEVEL := ../..
 TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize all-targets
+LINK_COMPONENTS := bitreader bitwriter naclbitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize nacltransforms naclanalysis all-targets
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index e385d7f577..1e8fb65e51 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -22,22 +22,23 @@
 #include "llvm/Analysis/Verifier.h"
 #include "llvm/Assembly/PrintModulePass.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h" // @LOCALMOD
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IRReader/IRReader.h"
+#include "llvm/IRReader/IRReader.h"  // @LOCALMOD
 #include "llvm/LinkAllIR.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/IRReader.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PassNameParser.h"
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
@@ -45,6 +46,7 @@
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/NaCl.h"  // @LOCALMOD
 #include <algorithm>
 #include <memory>
 using namespace llvm;
@@ -127,6 +129,18 @@ static cl::opt<bool>
 OptLevelO3("O3",
            cl::desc("Optimization level 3. Similar to clang -O3"));
 
+// @LOCALMOD-BEGIN
+static cl::opt<bool>
+PNaClABISimplifyPreOpt(
+    "pnacl-abi-simplify-preopt",
+    cl::desc("PNaCl ABI simplifications for before optimizations"));
+
+static cl::opt<bool>
+PNaClABISimplifyPostOpt(
+    "pnacl-abi-simplify-postopt",
+    cl::desc("PNaCl ABI simplifications for after optimizations"));
+// @LOCALMOD-END
+
 static cl::opt<std::string>
 TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
@@ -157,6 +171,18 @@ DefaultDataLayout("default-data-layout",
           cl::desc("data layout string to use if not specified by module"),
           cl::value_desc("layout-string"), cl::init(""));
 
+// @LOCALMOD-BEGIN
+static cl::opt<NaClFileFormat>
+OutputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of generated bitcode file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM bitcode file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+// @LOCALMOD-END
+
 // ---------- Define Printers for module and function passes ------------
 namespace {
 
@@ -577,6 +603,34 @@ int main(int argc, char **argv) {
   initializeInstCombine(Registry);
   initializeInstrumentation(Registry);
   initializeTarget(Registry);
+  // @LOCALMOD-BEGIN
+  initializeAddPNaClExternalDeclsPass(Registry);
+  initializeCanonicalizeMemIntrinsicsPass(Registry);
+  initializeExpandArithWithOverflowPass(Registry);
+  initializeExpandByValPass(Registry);
+  initializeExpandConstantExprPass(Registry);
+  initializeExpandCtorsPass(Registry);
+  initializeExpandGetElementPtrPass(Registry);
+  initializeExpandSmallArgumentsPass(Registry);
+  initializeExpandStructRegsPass(Registry);
+  initializeExpandTlsPass(Registry);
+  initializeExpandTlsConstantExprPass(Registry);
+  initializeExpandVarArgsPass(Registry);
+  initializeFlattenGlobalsPass(Registry);
+  initializeGlobalCleanupPass(Registry);
+  initializeInsertDivideCheckPass(Registry);
+  initializePNaClABIVerifyFunctionsPass(Registry);
+  initializePNaClABIVerifyModulePass(Registry);
+  initializePromoteI1OpsPass(Registry);
+  initializePromoteIntegersPass(Registry);
+  initializeReplacePtrsWithIntsPass(Registry);
+  initializeResolveAliasesPass(Registry);
+  initializeResolvePNaClIntrinsicsPass(Registry);
+  initializeRewriteLLVMIntrinsicsPass(Registry);
+  initializeRewritePNaClLibraryCallsPass(Registry);
+  initializeStripAttributesPass(Registry);
+  initializeStripMetadataPass(Registry);
+  // @LOCALMOD-END
 
   cl::ParseCommandLineOptions(argc, argv,
     "llvm .bc -> .bc modular optimizer and analysis printer\n");
@@ -733,6 +787,20 @@ int main(int argc, char **argv) {
       OptLevelO3 = false;
     }
 
+    // @LOCALMOD-BEGIN
+    if (PNaClABISimplifyPreOpt &&
+        PNaClABISimplifyPreOpt.getPosition() < PassList.getPosition(i)) {
+      PNaClABISimplifyAddPreOptPasses(Passes);
+      PNaClABISimplifyPreOpt = false;
+    }
+
+    if (PNaClABISimplifyPostOpt &&
+        PNaClABISimplifyPostOpt.getPosition() < PassList.getPosition(i)) {
+      PNaClABISimplifyAddPostOptPasses(Passes);
+      PNaClABISimplifyPostOpt = false;
+    }
+    // @LOCALMOD-END
+
     const PassInfo *PassInf = PassList[i];
     Pass *P = 0;
     if (PassInf->getNormalCtor())
@@ -805,6 +873,14 @@ int main(int argc, char **argv) {
     FPasses->doFinalization();
   }
 
+  // @LOCALMOD-BEGIN
+  if (PNaClABISimplifyPreOpt)
+    PNaClABISimplifyAddPreOptPasses(Passes);
+
+  if (PNaClABISimplifyPostOpt)
+    PNaClABISimplifyAddPostOptPasses(Passes);
+  // @LOCALMOD-END
+
   // Check that the module is well formed on completion of optimization
   if (!NoVerify && !VerifyEach)
     Passes.add(createVerifierPass());
@@ -813,8 +889,7 @@ int main(int argc, char **argv) {
   if (!NoOutput && !AnalyzeOnly) {
     if (OutputAssembly)
       Passes.add(createPrintModulePass(&Out->os()));
-    else
-      Passes.add(createBitcodeWriterPass(Out->os()));
+    // @LOCALMOD
   }
 
   // Before executing passes, print the final values of the LLVM options.
@@ -823,6 +898,23 @@ int main(int argc, char **argv) {
   // Now that we have all of the passes ready, run them.
   Passes.run(*M.get());
 
+// @LOCALMOD-BEGIN
+  // Write bitcode to the output.
+  if (!NoOutput && !AnalyzeOnly && !OutputAssembly) {
+    switch (OutputFileFormat) {
+      case LLVMFormat:
+        WriteBitcodeToFile(M.get(), Out->os());
+        break;
+      case PNaClFormat:
+        NaClWriteBitcodeToFile(M.get(), Out->os());
+        break;
+      default:
+        errs() << "Don't understand bitcode format for generated bitcode.\n";
+        return 1;
+    }
+  }
+// @LOCALMOD-END
+
   // Declare success.
   if (!NoOutput || PrintBreakpoints)
     Out->keep();
diff --git a/tools/pnacl-abicheck/CMakeLists.txt b/tools/pnacl-abicheck/CMakeLists.txt
new file mode 100644
index 0000000000..fda6d26ac8
--- /dev/null
+++ b/tools/pnacl-abicheck/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader naclbitreader irreader asmparser naclanalysis)
+
+add_llvm_tool(pnacl-abicheck
+  pnacl-abicheck.cpp
+  )
diff --git a/tools/pnacl-abicheck/LLVMBuild.txt b/tools/pnacl-abicheck/LLVMBuild.txt
new file mode 100644
index 0000000000..9e45f87f0a
--- /dev/null
+++ b/tools/pnacl-abicheck/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-abicheck/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-abicheck
+parent = Tools
+required_libraries = AsmParser BitReader NaClBitReader IRReader NaClAnalysis
diff --git a/tools/pnacl-abicheck/Makefile b/tools/pnacl-abicheck/Makefile
new file mode 100644
index 0000000000..97e2d22399
--- /dev/null
+++ b/tools/pnacl-abicheck/Makefile
@@ -0,0 +1,16 @@
+#===- tools/pnacl-abicheck/Makefile ------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-abicheck
+LINK_COMPONENTS := bitreader asmparser naclbitreader irreader naclanalysis
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/tools/pnacl-abicheck/pnacl-abicheck.cpp b/tools/pnacl-abicheck/pnacl-abicheck.cpp
new file mode 100644
index 0000000000..8b96f17954
--- /dev/null
+++ b/tools/pnacl-abicheck/pnacl-abicheck.cpp
@@ -0,0 +1,87 @@
+//===-- pnacl-abicheck.cpp - Check PNaCl bitcode ABI ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool checks files for compliance with the PNaCl bitcode ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/IRReader.h"
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<bool>
+Quiet("q", cl::desc("Do not print error messages"));
+
+static cl::opt<NaClFileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+
+// Print any errors collected by the error reporter. Return true if
+// there were any.
+static bool CheckABIVerifyErrors(PNaClABIErrorReporter &Reporter,
+                                 const Twine &Name) {
+  bool HasErrors = Reporter.getErrorCount() > 0;
+  if (HasErrors) {
+    if (!Quiet) {
+      outs() << "ERROR: " << Name << " is not valid PNaCl bitcode:\n";
+      Reporter.printErrors(outs());
+    }
+  }
+  Reporter.reset();
+  return HasErrors;
+}
+
+int main(int argc, char **argv) {
+  LLVMContext &Context = getGlobalContext();
+  SMDiagnostic Err;
+  cl::ParseCommandLineOptions(argc, argv, "PNaCl Bitcode ABI checker\n");
+
+  OwningPtr<Module> Mod(
+      NaClParseIRFile(InputFilename, InputFileFormat, Err, Context));
+  if (Mod.get() == 0) {
+    Err.print(argv[0], errs());
+    return 1;
+  }
+  PNaClABIErrorReporter ABIErrorReporter;
+  ABIErrorReporter.setNonFatal();
+  bool ErrorsFound = false;
+  // Manually run the passes so we can tell the user which function had the
+  // error. No need for a pass manager since it's just one pass.
+  OwningPtr<ModulePass> ModuleChecker(
+      createPNaClABIVerifyModulePass(&ABIErrorReporter));
+  ModuleChecker->runOnModule(*Mod);
+  ErrorsFound |= CheckABIVerifyErrors(ABIErrorReporter, "Module");
+  OwningPtr<FunctionPass> FunctionChecker(
+      createPNaClABIVerifyFunctionsPass(&ABIErrorReporter));
+  for (Module::iterator MI = Mod->begin(), ME = Mod->end(); MI != ME; ++MI) {
+    FunctionChecker->runOnFunction(*MI);
+    ErrorsFound |= CheckABIVerifyErrors(ABIErrorReporter,
+                                        "Function " + MI->getName());
+  }
+
+  return ErrorsFound ? 1 : 0;
+}
diff --git a/tools/pnacl-bcanalyzer/CMakeLists.txt b/tools/pnacl-bcanalyzer/CMakeLists.txt
new file mode 100644
index 0000000000..0cf17b8886
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS naclbitreader)
+
+add_llvm_tool(pnacl-bcanalyzer
+  pnacl-bcanalyzer.cpp
+  )
diff --git a/tools/pnacl-bcanalyzer/LLVMBuild.txt b/tools/pnacl-bcanalyzer/LLVMBuild.txt
new file mode 100644
index 0000000000..2944fca4b0
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-bcanalyzer/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-bcanalyzer
+parent = Tools
+required_libraries = NaClBitReader
diff --git a/tools/pnacl-bcanalyzer/Makefile b/tools/pnacl-bcanalyzer/Makefile
new file mode 100644
index 0000000000..d3ec1a81a9
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-bcanalyzer/Makefile ---------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-bcanalyzer
+LINK_COMPONENTS := naclbitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp b/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp
new file mode 100644
index 0000000000..225827e47b
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp
@@ -0,0 +1,641 @@
+//===-- pnacl-bcanalyzer.cpp - Bitcode Analyzer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool may be invoked in the following manner:
+//  pnacl-bcanalyzer [options]      - Read frozen PNaCl bitcode from stdin
+//  pnacl-bcanalyzer [options] x.bc - Read frozen PNaCl bitcode from the x.bc
+//                                   file
+//
+//  Options:
+//      --help      - Output information about command line switches
+//      --dump      - Dump low-level bitcode structure in readable format
+//
+// This tool provides analytical information about a bitcode file. It is
+// intended as an aid to developers of bitcode reading and writing software. It
+// produces on std::out a summary of the bitcode file that shows various
+// statistics about the contents of the file. By default this information is
+// detailed and contains information about individual bitcode blocks and the
+// functions in the module.
+// The tool is also able to print a bitcode file in a straight forward text
+// format that shows the containment and relationships of the information in
+// the bitcode file (-dump option).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pnacl-bcanalyzer"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+static cl::opt<std::string>
+  InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
+
+static cl::opt<unsigned> OpsPerLine(
+    "operands-per-line",
+    cl::desc("Number of operands to print per dump line. 0 implies "
+             "all operands will be printed on the same line (default)"),
+    cl::init(0));
+
+//===----------------------------------------------------------------------===//
+// Bitcode specific analysis.
+//===----------------------------------------------------------------------===//
+
+static cl::opt<bool> NoHistogram("disable-histogram",
+                                 cl::desc("Do not print per-code histogram"));
+
+static cl::opt<bool>
+NonSymbolic("non-symbolic",
+            cl::desc("Emit numeric info in dump even if"
+                     " symbolic info is available"));
+
+
+/// GetBlockName - Return a symbolic block name if known, otherwise return
+/// null.
+static const char *GetBlockName(unsigned BlockID,
+                                const NaClBitstreamReader &StreamFile) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < naclbitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == naclbitc::BLOCKINFO_BLOCK_ID)
+      return "BLOCKINFO_BLOCK";
+    return 0;
+  }
+
+  // Check to see if we have a blockinfo record for this block, with a name.
+  if (const NaClBitstreamReader::BlockInfo *Info =
+        StreamFile.getBlockInfo(BlockID)) {
+    if (!Info->Name.empty())
+      return Info->Name.c_str();
+  }
+
+  switch (BlockID) {
+  default: return 0;
+  case naclbitc::MODULE_BLOCK_ID:          return "MODULE_BLOCK";
+  case naclbitc::PARAMATTR_BLOCK_ID:       return "PARAMATTR_BLOCK";
+  case naclbitc::PARAMATTR_GROUP_BLOCK_ID: return "PARAMATTR_GROUP_BLOCK_ID";
+  case naclbitc::TYPE_BLOCK_ID_NEW:        return "TYPE_BLOCK_ID";
+  case naclbitc::CONSTANTS_BLOCK_ID:       return "CONSTANTS_BLOCK";
+  case naclbitc::FUNCTION_BLOCK_ID:        return "FUNCTION_BLOCK";
+  case naclbitc::VALUE_SYMTAB_BLOCK_ID:    return "VALUE_SYMTAB";
+  case naclbitc::METADATA_BLOCK_ID:        return "METADATA_BLOCK";
+  case naclbitc::METADATA_ATTACHMENT_ID:   return "METADATA_ATTACHMENT_BLOCK";
+  case naclbitc::USELIST_BLOCK_ID:         return "USELIST_BLOCK_ID";
+  case naclbitc::GLOBALVAR_BLOCK_ID:       return "GLOBALVAR_BLOCK";
+  }
+}
+
+/// GetCodeName - Return a symbolic code name if known, otherwise return
+/// null.
+static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
+                               const NaClBitstreamReader &StreamFile) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < naclbitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == naclbitc::BLOCKINFO_BLOCK_ID) {
+      switch (CodeID) {
+      default: return 0;
+      case naclbitc::BLOCKINFO_CODE_SETBID:        return "SETBID";
+      case naclbitc::BLOCKINFO_CODE_BLOCKNAME:     return "BLOCKNAME";
+      case naclbitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME";
+      }
+    }
+    return 0;
+  }
+
+  // Check to see if we have a blockinfo record for this record, with a name.
+  if (const NaClBitstreamReader::BlockInfo *Info =
+        StreamFile.getBlockInfo(BlockID)) {
+    for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
+      if (Info->RecordNames[i].first == CodeID)
+        return Info->RecordNames[i].second.c_str();
+  }
+
+  switch (BlockID) {
+  default: return 0;
+  case naclbitc::MODULE_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::MODULE_CODE_VERSION:     return "VERSION";
+    case naclbitc::MODULE_CODE_TRIPLE:      return "TRIPLE";
+    case naclbitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
+    case naclbitc::MODULE_CODE_ASM:         return "ASM";
+    case naclbitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
+    case naclbitc::MODULE_CODE_DEPLIB:      return "DEPLIB"; // FIXME: Remove in 4.0
+    case naclbitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
+    case naclbitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
+    case naclbitc::MODULE_CODE_ALIAS:       return "ALIAS";
+    case naclbitc::MODULE_CODE_PURGEVALS:   return "PURGEVALS";
+    case naclbitc::MODULE_CODE_GCNAME:      return "GCNAME";
+    }
+  case naclbitc::PARAMATTR_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
+    case naclbitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
+    case naclbitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
+    }
+  case naclbitc::TYPE_BLOCK_ID_NEW:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::TYPE_CODE_NUMENTRY:     return "NUMENTRY";
+    case naclbitc::TYPE_CODE_VOID:         return "VOID";
+    case naclbitc::TYPE_CODE_FLOAT:        return "FLOAT";
+    case naclbitc::TYPE_CODE_DOUBLE:       return "DOUBLE";
+    case naclbitc::TYPE_CODE_LABEL:        return "LABEL";
+    case naclbitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
+    case naclbitc::TYPE_CODE_INTEGER:      return "INTEGER";
+    case naclbitc::TYPE_CODE_POINTER:      return "POINTER";
+    case naclbitc::TYPE_CODE_ARRAY:        return "ARRAY";
+    case naclbitc::TYPE_CODE_VECTOR:       return "VECTOR";
+    case naclbitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
+    case naclbitc::TYPE_CODE_FP128:        return "FP128";
+    case naclbitc::TYPE_CODE_PPC_FP128:    return "PPC_FP128";
+    case naclbitc::TYPE_CODE_METADATA:     return "METADATA";
+    case naclbitc::TYPE_CODE_STRUCT_ANON:  return "STRUCT_ANON";
+    case naclbitc::TYPE_CODE_STRUCT_NAME:  return "STRUCT_NAME";
+    case naclbitc::TYPE_CODE_STRUCT_NAMED: return "STRUCT_NAMED";
+    case naclbitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
+    }
+
+  case naclbitc::CONSTANTS_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::CST_CODE_SETTYPE:         return "SETTYPE";
+    case naclbitc::CST_CODE_NULL:            return "NULL";
+    case naclbitc::CST_CODE_UNDEF:           return "UNDEF";
+    case naclbitc::CST_CODE_INTEGER:         return "INTEGER";
+    case naclbitc::CST_CODE_WIDE_INTEGER:    return "WIDE_INTEGER";
+    case naclbitc::CST_CODE_FLOAT:           return "FLOAT";
+    case naclbitc::CST_CODE_AGGREGATE:       return "AGGREGATE";
+    case naclbitc::CST_CODE_STRING:          return "STRING";
+    case naclbitc::CST_CODE_CSTRING:         return "CSTRING";
+    case naclbitc::CST_CODE_CE_BINOP:        return "CE_BINOP";
+    case naclbitc::CST_CODE_CE_CAST:         return "CE_CAST";
+    case naclbitc::CST_CODE_CE_GEP:          return "CE_GEP";
+    case naclbitc::CST_CODE_CE_INBOUNDS_GEP: return "CE_INBOUNDS_GEP";
+    case naclbitc::CST_CODE_CE_SELECT:       return "CE_SELECT";
+    case naclbitc::CST_CODE_CE_EXTRACTELT:   return "CE_EXTRACTELT";
+    case naclbitc::CST_CODE_CE_INSERTELT:    return "CE_INSERTELT";
+    case naclbitc::CST_CODE_CE_SHUFFLEVEC:   return "CE_SHUFFLEVEC";
+    case naclbitc::CST_CODE_CE_CMP:          return "CE_CMP";
+    case naclbitc::CST_CODE_INLINEASM:       return "INLINEASM";
+    case naclbitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+    case naclbitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
+    case naclbitc::CST_CODE_DATA:            return "DATA";
+    }
+  case naclbitc::FUNCTION_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
+
+    case naclbitc::FUNC_CODE_INST_BINOP:        return "INST_BINOP";
+    case naclbitc::FUNC_CODE_INST_CAST:         return "INST_CAST";
+    case naclbitc::FUNC_CODE_INST_GEP:          return "INST_GEP";
+    case naclbitc::FUNC_CODE_INST_INBOUNDS_GEP: return "INST_INBOUNDS_GEP";
+    case naclbitc::FUNC_CODE_INST_SELECT:       return "INST_SELECT";
+    case naclbitc::FUNC_CODE_INST_EXTRACTELT:   return "INST_EXTRACTELT";
+    case naclbitc::FUNC_CODE_INST_INSERTELT:    return "INST_INSERTELT";
+    case naclbitc::FUNC_CODE_INST_SHUFFLEVEC:   return "INST_SHUFFLEVEC";
+    case naclbitc::FUNC_CODE_INST_CMP:          return "INST_CMP";
+
+    case naclbitc::FUNC_CODE_INST_RET:          return "INST_RET";
+    case naclbitc::FUNC_CODE_INST_BR:           return "INST_BR";
+    case naclbitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
+    case naclbitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
+    case naclbitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
+
+    case naclbitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
+    case naclbitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
+    case naclbitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
+    case naclbitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
+    case naclbitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
+    case naclbitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
+    case naclbitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
+    case naclbitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
+    case naclbitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
+    case naclbitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
+    case naclbitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case naclbitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
+    case naclbitc::FUNC_CODE_INST_FORWARDTYPEREF: return "FORWARDTYPEREF";
+    }
+  case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::VST_CODE_ENTRY: return "ENTRY";
+    case naclbitc::VST_CODE_BBENTRY: return "BBENTRY";
+    }
+  case naclbitc::METADATA_ATTACHMENT_ID:
+    switch(CodeID) {
+    default:return 0;
+    case naclbitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
+    }
+  case naclbitc::METADATA_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case naclbitc::METADATA_STRING:      return "METADATA_STRING";
+    case naclbitc::METADATA_NAME:        return "METADATA_NAME";
+    case naclbitc::METADATA_KIND:        return "METADATA_KIND";
+    case naclbitc::METADATA_NODE:        return "METADATA_NODE";
+    case naclbitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
+    case naclbitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
+    }
+  case naclbitc::USELIST_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case naclbitc::USELIST_CODE_ENTRY:   return "USELIST_CODE_ENTRY";
+    }
+  case naclbitc::GLOBALVAR_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::GLOBALVAR_VAR:        return "VAR";
+    case naclbitc::GLOBALVAR_COMPOUND:   return "COMPOUND";
+    case naclbitc::GLOBALVAR_ZEROFILL:   return "ZEROFILL";
+    case naclbitc::GLOBALVAR_DATA:       return "DATA";
+    case naclbitc::GLOBALVAR_RELOC:      return "RELOC";
+    case naclbitc::GLOBALVAR_COUNT:      return "COUNT";
+    }
+  }
+}
+
+struct PerRecordStats {
+  unsigned NumInstances;
+  unsigned NumAbbrev;
+  uint64_t TotalBits;
+
+  PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+};
+
+struct PerBlockIDStats {
+  /// NumInstances - This the number of times this block ID has been seen.
+  unsigned NumInstances;
+
+  /// NumBits - The total size in bits of all of these blocks.
+  uint64_t NumBits;
+
+  /// NumSubBlocks - The total number of blocks these blocks contain.
+  unsigned NumSubBlocks;
+
+  /// NumAbbrevs - The total number of abbreviations.
+  unsigned NumAbbrevs;
+
+  /// NumRecords - The total number of records these blocks contain, and the
+  /// number that are abbreviated.
+  unsigned NumRecords, NumAbbreviatedRecords;
+
+  /// CodeFreq - Keep track of the number of times we see each code.
+  std::vector<PerRecordStats> CodeFreq;
+
+  PerBlockIDStats()
+    : NumInstances(0), NumBits(0),
+      NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
+};
+
+static std::map<unsigned, PerBlockIDStats> BlockIDStats;
+
+
+
+/// Error - All bitcode analysis errors go through this function, making this a
+/// good place to breakpoint if debugging.
+static bool Error(const std::string &Err) {
+  errs() << Err << "\n";
+  return true;
+}
+
+/// ParseBlock - Read a block, updating statistics, etc.
+static bool ParseBlock(NaClBitstreamCursor &Stream, unsigned BlockID,
+                       unsigned IndentLevel) {
+  std::string Indent(IndentLevel*2, ' ');
+  DEBUG(dbgs() << Indent << "-> ParseBlock(" << BlockID << ")\n");
+  uint64_t BlockBitStart = Stream.GetCurrentBitNo();
+
+  // Get the statistics for this BlockID.
+  PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
+
+  BlockStats.NumInstances++;
+
+  // BLOCKINFO is a special part of the stream.
+  if (BlockID == naclbitc::BLOCKINFO_BLOCK_ID) {
+    if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
+    if (Stream.ReadBlockInfoBlock())
+      return Error("Malformed BlockInfoBlock");
+    uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+    BlockStats.NumBits += BlockBitEnd-BlockBitStart;
+    DEBUG(dbgs() << Indent << "<- ParseBlock\n");
+    return false;
+  }
+
+  unsigned NumWords = 0;
+  if (Stream.EnterSubBlock(BlockID, &NumWords))
+    return Error("Malformed block record");
+
+  const char *BlockName = 0;
+  if (Dump) {
+    outs() << Indent << "<";
+    if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader())))
+      outs() << BlockName;
+    else
+      outs() << "UnknownBlock" << BlockID;
+
+    if (NonSymbolic && BlockName)
+      outs() << " BlockID=" << BlockID;
+
+    outs() << " NumWords=" << NumWords
+           << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n";
+  }
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this block.
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return Error("Premature end of bitstream");
+
+    uint64_t RecordStartBit = Stream.GetCurrentBitNo();
+
+    NaClBitstreamEntry Entry =
+      Stream.advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs);
+    
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      return Error("malformed bitcode file");
+    case NaClBitstreamEntry::EndBlock: {
+      uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+      BlockStats.NumBits += BlockBitEnd-BlockBitStart;
+      if (Dump) {
+        outs() << Indent << "</";
+        if (BlockName)
+          outs() << BlockName << ">\n";
+        else
+          outs() << "UnknownBlock" << BlockID << ">\n";
+      }
+      DEBUG(dbgs() << Indent << "<- ParseBlock\n");
+      return false;
+    }
+        
+    case NaClBitstreamEntry::SubBlock: {
+      uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
+      if (ParseBlock(Stream, Entry.ID, IndentLevel+1))
+        return true;
+      ++BlockStats.NumSubBlocks;
+      uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
+      
+      // Don't include subblock sizes in the size of this block.
+      BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
+      continue;
+    }
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    if (Entry.ID == naclbitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      ++BlockStats.NumAbbrevs;
+      continue;
+    }
+    
+    Record.clear();
+
+    ++BlockStats.NumRecords;
+
+    StringRef Blob;
+    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
+
+    // Increment the # occurrences of this code.
+    if (BlockStats.CodeFreq.size() <= Code)
+      BlockStats.CodeFreq.resize(Code+1);
+    BlockStats.CodeFreq[Code].NumInstances++;
+    BlockStats.CodeFreq[Code].TotalBits +=
+      Stream.GetCurrentBitNo()-RecordStartBit;
+    if (Entry.ID != naclbitc::UNABBREV_RECORD) {
+      BlockStats.CodeFreq[Code].NumAbbrev++;
+      ++BlockStats.NumAbbreviatedRecords;
+    }
+
+    if (Dump) {
+      outs() << Indent << "  <";
+      const char *CodeName =
+          GetCodeName(Code, BlockID, *Stream.getBitStreamReader());
+      if (CodeName)
+        outs() << CodeName;
+      else
+        outs() << "UnknownCode" << Code;
+      if (NonSymbolic && CodeName)
+        outs() << " codeid=" << Code;
+      if (Entry.ID != naclbitc::UNABBREV_RECORD)
+        outs() << " abbrevid=" << Entry.ID;
+
+      for (unsigned i = 0, e = Record.size(); i != e; ++i) {
+        if (OpsPerLine && (i % OpsPerLine) == 0 && i > 0) {
+          outs() << "\n" << Indent << "   ";
+          if (CodeName) {
+            for (unsigned j = 0; j < strlen(CodeName); ++j)
+              outs() << " ";
+          } else {
+            outs() << "   ";
+          }
+        }
+        outs() << " op" << i << "=" << (int64_t)Record[i];
+      }
+
+      outs() << "/>";
+
+      if (Blob.data()) {
+        outs() << " blob data = ";
+        bool BlobIsPrintable = true;
+        for (unsigned i = 0, e = Blob.size(); i != e; ++i)
+          if (!isprint(static_cast<unsigned char>(Blob[i]))) {
+            BlobIsPrintable = false;
+            break;
+          }
+
+        if (BlobIsPrintable)
+          outs() << "'" << Blob << "'";
+        else
+          outs() << "unprintable, " << Blob.size() << " bytes.";
+      }
+
+      outs() << "\n";
+    }
+  }
+}
+
+static void PrintSize(double Bits) {
+  outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
+}
+static void PrintSize(uint64_t Bits) {
+  outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits,
+                   (double)Bits/8, (unsigned long)(Bits/32));
+}
+
+
+/// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
+static int AnalyzeBitcode() {
+  DEBUG(dbgs() << "-> AnalyzeBitcode\n");
+  // Read the input file.
+  OwningPtr<MemoryBuffer> MemBuf;
+
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf))
+    return Error("Error reading '" + InputFilename + "': " + ec.message());
+
+  if (MemBuf->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+  NaClBitcodeHeader Header;
+  if (Header.Read(BufPtr, EndBufPtr))
+    return Error("Invalid PNaCl bitcode header");
+
+  if (!Header.IsSupported())
+    errs() << "Warning: " << Header.Unsupported() << "\n";
+
+  if (!Header.IsReadable())
+    Error("Bitcode file is not readable");
+
+  NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+  NaClBitstreamCursor Stream(StreamFile);
+  StreamFile.CollectBlockInfoNames();
+
+  unsigned NumTopBlocks = 0;
+
+  // Print out header information.
+  for (size_t i = 0, limit = Header.NumberFields(); i < limit; ++i) {
+    outs() << Header.GetField(i)->Contents() << "\n";
+  }
+  if (Header.NumberFields()) outs() << "\n";
+
+  // Parse the top-level structure.  We only allow blocks at the top-level.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code != naclbitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    if (ParseBlock(Stream, BlockID, 0))
+      return true;
+    ++NumTopBlocks;
+  }
+
+  if (Dump) outs() << "\n\n";
+
+  uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
+  // Print a summary of the read file.
+  outs() << "Summary of " << InputFilename << ":\n";
+  outs() << "  Total size: ";
+  PrintSize(BufferSizeBits);
+  outs() << "\n";
+  outs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  outs() << "\n";
+
+  // Emit per-block stats.
+  outs() << "Per-block Summary:\n";
+  for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
+       E = BlockIDStats.end(); I != E; ++I) {
+    outs() << "  Block ID #" << I->first;
+    if (const char *BlockName = GetBlockName(I->first, StreamFile))
+      outs() << " (" << BlockName << ")";
+    outs() << ":\n";
+
+    const PerBlockIDStats &Stats = I->second;
+    outs() << "      Num Instances: " << Stats.NumInstances << "\n";
+    outs() << "         Total Size: ";
+    PrintSize(Stats.NumBits);
+    outs() << "\n";
+    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+    outs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
+    if (Stats.NumInstances > 1) {
+      outs() << "       Average Size: ";
+      PrintSize(Stats.NumBits/(double)Stats.NumInstances);
+      outs() << "\n";
+      outs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+             << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
+      outs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+             << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
+      outs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+             << Stats.NumRecords/(double)Stats.NumInstances << "\n";
+    } else {
+      outs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      outs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      outs() << "        Num Records: " << Stats.NumRecords << "\n";
+    }
+    if (Stats.NumRecords) {
+      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+      outs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+    }
+    outs() << "\n";
+
+    // Print a histogram of the codes we see.
+    if (!NoHistogram && !Stats.CodeFreq.empty()) {
+      std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
+      for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
+        if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
+          FreqPairs.push_back(std::make_pair(Freq, i));
+      std::stable_sort(FreqPairs.begin(), FreqPairs.end());
+      std::reverse(FreqPairs.begin(), FreqPairs.end());
+
+      outs() << "\tRecord Histogram:\n";
+      outs() << "\t\t  Count    # Bits %% Abv  Record Kind\n";
+      for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
+        const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+
+        outs() << format("\t\t%7d %9lu",
+                         RecStats.NumInstances,
+                         (unsigned long)RecStats.TotalBits);
+
+        if (RecStats.NumAbbrev)
+          outs() <<
+              format("%7.2f  ",
+                     (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
+        else
+          outs() << "         ";
+
+        if (const char *CodeName =
+              GetCodeName(FreqPairs[i].second, I->first, StreamFile))
+          outs() << CodeName << "\n";
+        else
+          outs() << "UnknownCode" << FreqPairs[i].second << "\n";
+      }
+      outs() << "\n";
+
+    }
+  }
+  DEBUG(dbgs() << "<- AnalyzeBitcode\n");
+  return 0;
+}
+
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "pnacl-bcanalyzer file analyzer\n");
+
+  return AnalyzeBitcode();
+}
diff --git a/tools/pnacl-freeze/CMakeLists.txt b/tools/pnacl-freeze/CMakeLists.txt
new file mode 100644
index 0000000000..fca58c7d5a
--- /dev/null
+++ b/tools/pnacl-freeze/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader naclbitwriter naclbitreader)
+
+add_llvm_tool(pnacl-freeze
+  pnacl-freeze.cpp
+  )
diff --git a/tools/pnacl-freeze/LLVMBuild.txt b/tools/pnacl-freeze/LLVMBuild.txt
new file mode 100644
index 0000000000..8e3499b991
--- /dev/null
+++ b/tools/pnacl-freeze/LLVMBuild.txt
@@ -0,0 +1,16 @@
+;===- ./tools/pnacl-freeze/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-freeze
+parent = Tools
+required_libraries = NaClBitWriter BitReader
diff --git a/tools/pnacl-freeze/Makefile b/tools/pnacl-freeze/Makefile
new file mode 100644
index 0000000000..5872f1cd15
--- /dev/null
+++ b/tools/pnacl-freeze/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-freeze/Makefile -------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-freeze
+LINK_COMPONENTS := naclbitwriter bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-freeze/pnacl-freeze.cpp b/tools/pnacl-freeze/pnacl-freeze.cpp
new file mode 100644
index 0000000000..297edb85a7
--- /dev/null
+++ b/tools/pnacl-freeze/pnacl-freeze.cpp
@@ -0,0 +1,95 @@
+/* Copyright 2013 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+//===-- pnacl-freeze.cpp - The low-level NaCl bitcode freezer     --------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates NaCl pexe wire format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+// Note: We need the following to provide the API for calling the NaCl
+// Bitcode Writer to generate the frozen file.
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+// Note: We need the following to provide the API for calling the (LLVM)
+// Bitcode Reader to read in the corresonding pexe file to freeze.
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify output filename"),
+	       cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<pexe file>"), cl::init("-"));
+
+static void WriteOutputFile(const Module *M) {
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out
+    (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+			  raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    exit(1);
+  }
+
+  NaClWriteBitcodeToFile(M, Out->os());
+
+  // Declare success.
+  Out->keep();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "Generates NaCl pexe wire format\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
+    else
+      DisplayFilename = InputFilename;
+    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                     &ErrorMessage));
+    if(M.get() != 0 && M->MaterializeAllPermanently(&ErrorMessage)) {
+      M.reset();
+    }
+  }
+
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+
+  WriteOutputFile(M.get());
+  return 0;
+}
diff --git a/tools/pnacl-llc/CMakeLists.txt b/tools/pnacl-llc/CMakeLists.txt
new file mode 100644
index 0000000000..9e53a28aff
--- /dev/null
+++ b/tools/pnacl-llc/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader naclbitreader
+    irreader asmparser naclanalysis)
+
+add_llvm_tool(pnacl-llc
+# This file provides wrappers to lseek(2), read(2), etc. 
+  nacl_file.cpp
+  SRPCStreamer.cpp
+  pnacl-llc.cpp
+  )
diff --git a/tools/pnacl-llc/LLVMBuild.txt b/tools/pnacl-llc/LLVMBuild.txt
new file mode 100644
index 0000000000..8d441f5a70
--- /dev/null
+++ b/tools/pnacl-llc/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-llc/LLVMBuild.txt --------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-llc
+parent = Tools
+required_libraries = AsmParser BitReader NaClBitReader IRReader all-targets NaClAnalysis
diff --git a/tools/pnacl-llc/Makefile b/tools/pnacl-llc/Makefile
new file mode 100644
index 0000000000..bf4a0e8be8
--- /dev/null
+++ b/tools/pnacl-llc/Makefile
@@ -0,0 +1,16 @@
+#===- tools/pnacl-llc/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-llc
+LINK_COMPONENTS := all-targets bitreader naclbitreader irreader \
+                   asmparser naclanalysis nacltransforms
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/pnacl-llc/SRPCStreamer.cpp b/tools/pnacl-llc/SRPCStreamer.cpp
new file mode 100644
index 0000000000..c41650c89a
--- /dev/null
+++ b/tools/pnacl-llc/SRPCStreamer.cpp
@@ -0,0 +1,142 @@
+//===-- SRPCStreamer.cpp - Stream bitcode over SRPC  ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(__native_client__)
+#define DEBUG_TYPE "bitcode-stream"
+#include "SRPCStreamer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <errno.h>
+
+using llvm::dbgs;
+
+const size_t QueueStreamer::queuesize_limit_;
+
+size_t QueueStreamer::GetBytes(unsigned char *buf, size_t len) {
+  size_t total_copied = 0;
+  pthread_mutex_lock(&Mutex);
+  while (!Done && queueSize() < len - total_copied) {
+    size_t size = queueSize();
+    DEBUG(dbgs() << "QueueStreamer::GetBytes len " << len << " size " <<
+          size << " << waiting\n");
+    queueGet(buf + total_copied, size);
+    total_copied += size;
+    pthread_cond_signal(&Cond);
+    pthread_cond_wait(&Cond, &Mutex);
+  }
+  // If this is the last partial chunk, adjust len such that the amount we
+  // fetch will be just the remaining bytes.
+  if (Done && queueSize() < len - total_copied) {
+    len = queueSize() + total_copied;
+  }
+  queueGet(buf + total_copied, len - total_copied);
+  pthread_cond_signal(&Cond);
+  pthread_mutex_unlock(&Mutex);
+  return len;
+}
+
+size_t QueueStreamer::PutBytes(unsigned char *buf, size_t len) {
+  size_t total_copied = 0;
+  pthread_mutex_lock(&Mutex);
+  while (capacityRemaining() < len - total_copied) {
+    if (Bytes.size() * 2 > queuesize_limit_) {
+      size_t space = capacityRemaining();
+      queuePut(buf + total_copied, space);
+      total_copied += space;
+      pthread_cond_signal(&Cond);
+      pthread_cond_wait(&Cond, &Mutex);
+    } else {
+      queueResize();
+    }
+  }
+  queuePut(buf + total_copied, len - total_copied);
+  pthread_cond_signal(&Cond);
+  pthread_mutex_unlock(&Mutex);
+  return len;
+}
+
+void QueueStreamer::SetDone() {
+  // Still need the lock to avoid signaling between the check and
+  // the wait in GetBytes.
+  pthread_mutex_lock(&Mutex);
+  Done = true;
+  pthread_cond_signal(&Cond);
+  pthread_mutex_unlock(&Mutex);
+}
+
+// Double the size of the queue. Called with Mutex to protect Cons/Prod/Bytes.
+void QueueStreamer::queueResize() {
+  int leftover = Bytes.size() - Cons;
+  DEBUG(dbgs() << "resizing to " << Bytes.size() * 2 << " " << leftover << " "
+        << Prod << " " << Cons << "\n");
+  Bytes.resize(Bytes.size() * 2);
+  if (Cons > Prod) {
+    // There are unread bytes left between Cons and the previous end of the
+    // buffer. Move them to the new end of the buffer.
+    memmove(&Bytes[Bytes.size() - leftover], &Bytes[Cons], leftover);
+    Cons = Bytes.size() - leftover;
+  }
+}
+
+// Called with Mutex held to protect Cons, Prod, and Bytes
+void QueueStreamer::queuePut(unsigned char *buf, size_t len) {
+  size_t EndSpace = std::min(len, Bytes.size() - Prod);
+  DEBUG(dbgs() << "put, len " << len << " Endspace " << EndSpace << " p " <<
+        Prod << " c " << Cons << "\n");
+  // Copy up to the end of the buffer
+  memcpy(&Bytes[Prod], buf, EndSpace);
+  // Wrap around if necessary
+  memcpy(&Bytes[0], buf + EndSpace, len - EndSpace);
+  Prod = (Prod + len) % Bytes.size();
+}
+
+// Called with Mutex held to protect Cons, Prod, and Bytes
+void QueueStreamer::queueGet(unsigned char *buf, size_t len) {
+  assert(len <= queueSize());
+  size_t EndSpace = std::min(len, Bytes.size() - Cons);
+  DEBUG(dbgs() << "get, len " << len << " Endspace " << EndSpace << " p " <<
+        Prod << " c " << Cons << "\n");
+  // Copy up to the end of the buffer
+  memcpy(buf, &Bytes[Cons], EndSpace);
+  // Wrap around if necessary
+  memcpy(buf + EndSpace, &Bytes[0], len - EndSpace);
+  Cons = (Cons + len) % Bytes.size();
+}
+
+llvm::DataStreamer *SRPCStreamer::init(void *(*Callback)(void *), void *arg,
+                                       std::string *ErrMsg) {
+  int err = pthread_create(&CompileThread, NULL, Callback, arg);
+  if (err) {
+    if (ErrMsg) *ErrMsg = std::string(strerror(errno));
+    return NULL;
+  }
+  return &Q;
+}
+
+size_t SRPCStreamer::gotChunk(unsigned char *bytes, size_t len) {
+  if (Error) return 0;
+  return Q.PutBytes(bytes, len);
+}
+
+int SRPCStreamer::streamEnd(std::string *ErrMsg) {
+  Q.SetDone();
+  int err = pthread_join(CompileThread, NULL);
+  if (err) {
+    if (ErrMsg) *ErrMsg = std::string(strerror(errno));
+    return err;
+  }
+  if (Error && ErrMsg) *ErrMsg = std::string("compile failed.");
+  return Error;
+}
+
+#endif
diff --git a/tools/pnacl-llc/SRPCStreamer.h b/tools/pnacl-llc/SRPCStreamer.h
new file mode 100644
index 0000000000..4c1c6737e6
--- /dev/null
+++ b/tools/pnacl-llc/SRPCStreamer.h
@@ -0,0 +1,117 @@
+//===-- SRPCStreamer.cpp - Stream bitcode over SRPC  ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SRPCSTREAMER_H
+#define SRPCSTREAMER_H
+
+#include <pthread.h>
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <vector>
+#include "llvm/Support/DataStream.h"
+
+// Implements LLVM's interface for fetching data from a stream source.
+// Bitcode bytes from the RPC thread are placed here with PutBytes and buffered
+// until the bitcode reader calls GetBytes to remove them.
+// The blocking behavior of GetBytes and PutBytes means that if the
+// compilation happens faster than the bytes come in from the browser, the
+// whole pipeline can block waiting for the RPC thread to put more bytes.
+
+class QueueStreamer : public llvm::DataStreamer {
+ public:
+ QueueStreamer() : Done(false), Prod(0), Cons(0) {
+    pthread_mutex_init(&Mutex, NULL);
+    pthread_cond_init(&Cond, NULL);
+    Bytes.resize(64 * 1024);
+  }
+
+  // Called by the compilation thread. Copy len bytes from the queue into
+  // buf. If there are less than len bytes available, copy as many as
+  // there are, signal the RPC thread, and block to wait for the rest.
+  // If all bytes have been received from the browser and there are
+  // fewer than len bytes available, copy all remaining bytes.
+  // Return the number of bytes copied.
+  virtual size_t GetBytes(unsigned char *buf, size_t len);
+
+  // Called by the RPC thread. Copy len bytes from buf into the queue.
+  // If there is not enough space in the queue, copy as many bytes as
+  // will fit, signal the compilation thread, and block until there is
+  // enough space for the rest.
+  // Return the number of bytes copied.
+  size_t PutBytes(unsigned char *buf, size_t len);
+
+  // Called by the RPC thread. Signal that all bytes have been received,
+  // so the last call to GetBytes will return the remaining bytes rather
+  // than waiting for the entire requested amound.
+  void SetDone();
+
+ private:
+  bool Done;
+  pthread_mutex_t Mutex;
+  pthread_cond_t Cond;
+  // Maximum size of the queue. The limitation on the queue size means that
+  // if the compilation happens slower than bytes arrive from the network,
+  // the queue will fill up, the RPC thread will be blocked most of the time,
+  // the RPC thread on the browser side will be waiting for the SRPC to return,
+  // and the buffer on the browser side will grow unboundedly until the
+  // whole bitcode file arrives (which is better than having the queue on
+  // the untrusted side consume all that memory).
+  // The partial-copying behavior of GetBytes and PutBytes prevents deadlock
+  // even if the requested number of bytes is greater than the size limit
+  // (although it will of course be less efficient).
+  // The initial size of the queue is expected to be smaller than this, but
+  // if not, it will simply never be resized.
+  const static size_t queuesize_limit_ = 256 * 1024;
+
+  // Variables and functions to manage the circular queue
+  std::vector<unsigned char> Bytes;
+  size_t Prod; // Queue producer index
+  size_t Cons; // Queue consumer index
+  size_t queueSize() {
+    return Prod >= Cons ? Prod - Cons : Bytes.size() - (Cons - Prod);
+  }
+  size_t capacityRemaining() {
+    return (Prod >= Cons ? Bytes.size() - (Prod - Cons) : (Cons - Prod)) - 1;
+  }
+  void queueResize();
+  void queuePut(unsigned char *buf, size_t len);
+  void queueGet(unsigned char *buf, size_t len);
+};
+
+// Class to manage the compliation thread and serve as the interface from
+// the SRPC thread
+class SRPCStreamer  {
+public:
+  SRPCStreamer() : Error(false) {}
+  // Initialize streamer, create a new thread running Callback, and
+  // return a pointer to the DataStreamer the threads will use to
+  // synchronize. On error, return NULL and fill in the ErrorMsg string
+  llvm::DataStreamer *init(void *(*Callback)(void *),
+                           void *arg, std::string *ErrMsg);
+  // Called by the RPC thread. Copy len bytes from buf. Return bytes copied.
+  size_t gotChunk(unsigned char *bytes, size_t len);
+  // Called by the RPC thread. Wait for the compilation thread to finish.
+  int streamEnd(std::string *ErrMsg);
+  // Called by the compilation thread. Signal that there was a compilation
+  // error so the RPC thread can abort the stream.
+  void setError() { Error = true; }
+private:
+  bool Error;
+  QueueStreamer Q;
+  pthread_t CompileThread;
+};
+
+
+
+#endif  // SRPCSTREAMER_H
diff --git a/tools/pnacl-llc/nacl_file.cpp b/tools/pnacl-llc/nacl_file.cpp
new file mode 100644
index 0000000000..8ae0399476
--- /dev/null
+++ b/tools/pnacl-llc/nacl_file.cpp
@@ -0,0 +1,399 @@
+/* Copyright 2012 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ *
+ * This file provides wrappers to open() to use pre-opened file descriptors
+ * for the input bitcode and the output file.
+ *
+ * It also has the SRPC interfaces, but that should probably be refactored
+ * into a separate file.
+ */
+
+#if defined(__native_client__)
+
+#include <argz.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+// Headers which are not properly part of the SDK are included by their
+// path in the nacl tree
+#include "native_client/src/shared/srpc/nacl_srpc.h"
+#ifdef __pnacl__
+#include "native_client/src/untrusted/nacl/pnacl.h"
+#endif
+#include "SRPCStreamer.h"
+
+#include <string>
+#include <map>
+#include <vector>
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+
+
+using llvm::MemoryBuffer;
+using llvm::StringRef;
+using std::string;
+using std::map;
+
+#define printerr(...)  fprintf(stderr, __VA_ARGS__)
+// printdbg is currently disabled to reduce spew.
+#define printdbg(...)
+
+#define ARRAY_SIZE(array) (sizeof array / sizeof array[0])
+
+namespace {
+
+typedef std::vector<std::string> string_vector;
+
+// The filename used internally for looking up the bitcode file.
+char kBitcodeFilename[] = "pnacl.pexe";
+// The filename used internally for looking up the object code file.
+char kObjectFilename[] = "pnacl.o";
+// Object which manages streaming bitcode over SRPC and threading.
+SRPCStreamer *srpc_streamer;
+// FD of the object file.
+int object_file_fd;
+
+}  // namespace
+
+//TODO(dschuff): a little more elegant interface into llc than this?
+extern llvm::DataStreamer* NaClBitcodeStreamer;
+
+extern int llc_main(int argc, char **argv);
+
+int GetObjectFileFD() {
+  return object_file_fd;
+}
+
+namespace {
+
+int DoTranslate(string_vector* cmd_line_vec, int object_fd) {
+  if (cmd_line_vec == NULL) {
+    return 1;
+  }
+  object_file_fd = object_fd;
+  // Make an argv array from the input vector.
+  size_t argc = cmd_line_vec->size();
+  char** argv = new char*[argc];
+  for (size_t i = 0; i < argc; ++i) {
+    // llc_main will not mutate the command line, so this is safe.
+    argv[i] = const_cast<char*>((*cmd_line_vec)[i].c_str());
+  }
+  argv[argc] = NULL;
+  // Call main.
+  return llc_main(static_cast<int>(argc), argv);
+}
+
+string_vector* CommandLineFromArgz(char* str, size_t str_len) {
+  char* entry = str;
+  string_vector* vec = new string_vector;
+  while (entry != NULL) {
+    vec->push_back(entry);
+    entry = argz_next(str, str_len, entry);
+  }
+  return vec;
+}
+
+void AddFixedArguments(string_vector* vec) {
+  // Add fixed arguments to the command line.  These specify the bitcode
+  // and object code filenames, removing them from the contract with the
+  // coordinator.
+  vec->push_back(kBitcodeFilename);
+  vec->push_back("-o");
+  vec->push_back(kObjectFilename);
+}
+
+bool AddDefaultCPU(string_vector* vec) {
+#if defined (__pnacl__)
+  switch (__builtin_nacl_target_arch()) {
+    case PnaclTargetArchitectureX86_32: {
+      vec->push_back("-mcpu=pentium4");
+      break;
+    }
+    case PnaclTargetArchitectureX86_64: {
+      vec->push_back("-mcpu=core2");
+      break;
+    }
+    case PnaclTargetArchitectureARM_32: {
+      vec->push_back("-mcpu=cortex-a9");
+      break;
+    }
+    default:
+      printerr("no target architecture match.\n");
+      return false;
+  }
+// Some cases for building this with nacl-gcc.
+#elif defined (__i386__)
+  vec->push_back("-mcpu=pentium4");
+#elif defined (__x86_64__)
+  vec->push_back("-mcpu=core2");
+#elif defined (__arm__)
+  vec->push_back("-mcpu=cortex-a9");
+#error "Unknown architecture"
+#endif
+  return true;
+}
+
+bool HasCPUOverride(string_vector* vec) {
+  std::string mcpu = std::string("-mcpu=");
+  size_t len = mcpu.length();
+  for (size_t i = 0; i < vec->size(); ++i) {
+    std::string prefix = (*vec)[i].substr(0, len);
+    if (prefix.compare(mcpu) == 0)
+      return true;
+  }
+  return false;
+}
+
+string_vector* GetDefaultCommandLine() {
+  string_vector* command_line = new string_vector;
+  size_t i;
+  // First, those common to all architectures.
+  static const char* common_args[] = { "pnacl_translator",
+                                       "-filetype=obj" };
+  for (i = 0; i < ARRAY_SIZE(common_args); ++i) {
+    command_line->push_back(common_args[i]);
+  }
+  // Then those particular to a platform.
+  static const char* llc_args_x8632[] = { "-mtriple=i686-none-nacl-gnu",
+                                          NULL };
+  static const char* llc_args_x8664[] = { "-mtriple=x86_64-none-nacl-gnu",
+                                          NULL };
+  static const char* llc_args_arm[] = { "-mtriple=armv7a-none-nacl-gnueabi",
+                                        "-arm-reserve-r9",
+                                        "-sfi-disable-cp",
+                                        "-sfi-store",
+                                        "-sfi-load",
+                                        "-sfi-stack",
+                                        "-sfi-branch",
+                                        "-sfi-data",
+                                        "-mattr=+neon",
+                                        "-no-inline-jumptables",
+                                        "-float-abi=hard",
+                                        NULL };
+
+  const char **llc_args = NULL;
+#if defined (__pnacl__)
+  switch (__builtin_nacl_target_arch()) {
+    case PnaclTargetArchitectureX86_32: {
+      llc_args = llc_args_x8632;
+      break;
+    }
+    case PnaclTargetArchitectureX86_64: {
+      llc_args = llc_args_x8664;
+      break;
+    }
+    case PnaclTargetArchitectureARM_32: {
+      llc_args = llc_args_arm;
+      break;
+    }
+    default:
+      printerr("no target architecture match.\n");
+      delete command_line;
+      return NULL;
+  }
+// Some cases for building this with nacl-gcc.
+#elif defined (__i386__)
+  llc_args = llc_args_x8632;
+#elif defined (__x86_64__)
+  llc_args = llc_args_x8664;
+#elif defined (__arm__)
+  llc_args = llc_args_arm;
+#else
+#error "Unknown architecture"
+#endif
+  for (i = 0; llc_args[i] != NULL; i++) command_line->push_back(llc_args[i]);
+  return command_line;
+}
+
+// Data passed from main thread to compile thread.
+// Takes ownership of the commandline vector.
+class StreamingThreadData {
+ public:
+  StreamingThreadData(int object_fd, string_vector* cmd_line_vec) :
+      object_fd_(object_fd), cmd_line_vec_(cmd_line_vec) {}
+  int ObjectFD() const { return object_fd_; }
+  string_vector* CmdLineVec() const { return cmd_line_vec_.get(); }
+  const int object_fd_;
+  const llvm::OwningPtr<string_vector> cmd_line_vec_;
+};
+
+void *run_streamed(void *arg) {
+  StreamingThreadData* data = reinterpret_cast<StreamingThreadData*>(arg);
+  data->CmdLineVec()->push_back("-streaming-bitcode");
+  if (DoTranslate(data->CmdLineVec(), data->ObjectFD()) != 0) {
+    printerr("DoTranslate failed.\n");
+    srpc_streamer->setError();
+    return NULL;
+  }
+  delete data;
+  return NULL;
+}
+
+// Actually do the work for stream initialization.
+void do_stream_init(NaClSrpcRpc *rpc,
+                    NaClSrpcArg **in_args,
+                    NaClSrpcArg **out_args,
+                    NaClSrpcClosure *done,
+                    string_vector* command_line_vec) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  srpc_streamer = new SRPCStreamer();
+  std::string StrError;
+  StreamingThreadData* thread_data = new StreamingThreadData(
+      in_args[0]->u.hval, command_line_vec);
+  NaClBitcodeStreamer = srpc_streamer->init(run_streamed,
+      reinterpret_cast<void *>(thread_data),
+      &StrError);
+  if (NaClBitcodeStreamer) {
+    rpc->result = NACL_SRPC_RESULT_OK;
+    out_args[0]->arrays.str = strdup("no error");
+  } else {
+    out_args[0]->arrays.str = strdup(StrError.c_str());
+  }
+}
+
+// Invoked by the StreamInit RPC to initialize bitcode streaming over SRPC.
+// Under the hood it forks a new thread at starts the llc_main, which sets
+// up the compilation and blocks when it tries to start reading the bitcode.
+// Input arg is a file descriptor to write the output object file to.
+// Returns a string, containing an error message if the call fails.
+void stream_init(NaClSrpcRpc *rpc,
+                 NaClSrpcArg **in_args,
+                 NaClSrpcArg **out_args,
+                 NaClSrpcClosure *done) {
+  // cmd_line_vec allocated by GetDefaultCommandLine() is freed by the
+  // translation thread in run_streamed()
+  string_vector* cmd_line_vec = GetDefaultCommandLine();
+  if (!cmd_line_vec || !AddDefaultCPU(cmd_line_vec)) {
+    NaClSrpcClosureRunner runner(done);
+    rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+    out_args[0]->arrays.str = strdup("Failed to get default commandline.");
+    return;
+  }
+  AddFixedArguments(cmd_line_vec);
+  do_stream_init(rpc, in_args, out_args, done, cmd_line_vec);
+}
+
+// Invoked by StreamInitWithCommandLine RPC. Same as stream_init, but
+// provides a command line to use instead of the default.
+void stream_init_with_command_line(NaClSrpcRpc *rpc,
+                                   NaClSrpcArg **in_args,
+                                   NaClSrpcArg **out_args,
+                                   NaClSrpcClosure *done) {
+  char* command_line = in_args[1]->arrays.carr;
+  size_t command_line_len = in_args[1]->u.count;
+  string_vector* cmd_line_vec =
+      CommandLineFromArgz(command_line, command_line_len);
+  AddFixedArguments(cmd_line_vec);
+  // cmd_line_vec is freed by the translation thread in run_streamed
+  do_stream_init(rpc, in_args, out_args, done, cmd_line_vec);
+}
+
+// Invoked by StreamInitWithOverrides RPC. Same as stream_init, but
+// provides commandline flag overrides (appended to the default).
+void stream_init_with_overrides(NaClSrpcRpc *rpc,
+                                NaClSrpcArg **in_args,
+                                NaClSrpcArg **out_args,
+                                NaClSrpcClosure *done) {
+  string_vector* cmd_line_vec = GetDefaultCommandLine();
+  if (!cmd_line_vec) {
+    NaClSrpcClosureRunner runner(done);
+    rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+    out_args[0]->arrays.str = strdup("Failed to get default commandline.");
+    return;
+  }
+  AddFixedArguments(cmd_line_vec);
+
+  char* command_line = in_args[1]->arrays.carr;
+  size_t command_line_len = in_args[1]->u.count;
+  llvm::OwningPtr<string_vector> extra_vec(
+      CommandLineFromArgz(command_line, command_line_len));
+  cmd_line_vec->insert(cmd_line_vec->end(),
+                       extra_vec->begin(), extra_vec->end());
+  // Make sure some -mcpu override exists for now to prevent
+  // auto-cpu feature detection from triggering instructions that
+  // we do not validate yet.
+  if (!HasCPUOverride(extra_vec.get())) {
+    AddDefaultCPU(cmd_line_vec);
+  }
+  extra_vec.reset(NULL);
+  // cmd_line_vec is freed by the translation thread in run_streamed.
+  do_stream_init(rpc, in_args, out_args, done, cmd_line_vec);
+}
+
+// Invoked by the StreamChunk RPC. Receives a chunk of the bitcode and
+// buffers it for later retrieval by the compilation thread.
+void stream_chunk(NaClSrpcRpc *rpc,
+                 NaClSrpcArg **in_args,
+                 NaClSrpcArg **out_args,
+                 NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  size_t len = in_args[0]->u.count;
+  unsigned char *bytes = reinterpret_cast<unsigned char*>(
+      in_args[0]->arrays.carr);
+  if (srpc_streamer->gotChunk(bytes, len) != len) {
+    return;
+  }
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+// Invoked by the StreamEnd RPC. Waits until the compilation finishes,
+// then returns. Returns an int indicating whether the bitcode is a
+// shared library, a string with the soname, a string with dependencies,
+// and a string which contains an error message if applicable.
+void stream_end(NaClSrpcRpc *rpc,
+                NaClSrpcArg **in_args,
+                NaClSrpcArg **out_args,
+                NaClSrpcClosure *done) {
+  NaClSrpcClosureRunner runner(done);
+  rpc->result = NACL_SRPC_RESULT_APP_ERROR;
+  std::string StrError;
+  if (srpc_streamer->streamEnd(&StrError)) {
+    out_args[3]->arrays.str = strdup(StrError.c_str());
+    return;
+  }
+  // TODO(eliben): We don't really use shared libraries now. At some
+  // point this should be cleaned up from SRPC as well.
+  out_args[0]->u.ival = false;
+  // SRPC deletes the strings returned when the closure is invoked.
+  // Therefore we need to use strdup.
+  out_args[1]->arrays.str = strdup("");
+  out_args[2]->arrays.str = strdup("");
+  rpc->result = NACL_SRPC_RESULT_OK;
+}
+
+const struct NaClSrpcHandlerDesc srpc_methods[] = {
+  // Protocol for streaming:
+  // (StreamInit(obj_fd) -> error_str |
+  //    StreamInitWIthCommandLine(obj_fd, escaped_cmdline) -> error_str)
+  // StreamChunk(data) +
+  // StreamEnd() -> (is_shared_lib,soname,dependencies,error_str)
+  { "StreamInit:h:s", stream_init },
+  { "StreamInitWithCommandLine:hC:s:", stream_init_with_command_line },
+  { "StreamInitWithOverrides:hC:s:", stream_init_with_overrides },
+  { "StreamChunk:C:", stream_chunk },
+  { "StreamEnd::isss", stream_end },
+  { NULL, NULL },
+};
+
+}  // namespace
+
+int
+main() {
+  if (!NaClSrpcModuleInit()) {
+    return 1;
+  }
+
+  if (!NaClSrpcAcceptClientConnection(srpc_methods)) {
+    return 1;
+  }
+  NaClSrpcModuleFini();
+  return 0;
+}
+
+#endif
diff --git a/tools/pnacl-llc/pnacl-llc.cpp b/tools/pnacl-llc/pnacl-llc.cpp
new file mode 100644
index 0000000000..6292001ab1
--- /dev/null
+++ b/tools/pnacl-llc/pnacl-llc.cpp
@@ -0,0 +1,563 @@
+//===-- pnacl-llc.cpp - PNaCl-specific llc: pexe ---> nexe  ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// pnacl-llc: the core of the PNaCl translator, compiling a pexe into a nexe.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
+#include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/NaCl.h"
+#include <memory>
+
+
+using namespace llvm;
+
+// NOTE: When __native_client__ is defined it means pnacl-llc is built as a
+// sandboxed translator (from pnacl-llc.pexe to pnacl-llc.nexe). In this mode
+// it uses SRPC operations instead of direct OS intefaces.
+#if defined(__native_client__)
+int GetObjectFileFD();
+DataStreamer* NaClBitcodeStreamer;
+#endif
+
+const char *TimeIRParsingGroupName = "LLVM IR Parsing";
+const char *TimeIRParsingName = "Parse IR";
+
+bool TimeIRParsingIsEnabled = false;
+static cl::opt<bool,true>
+EnableTimeIRParsing("time-ir-parsing", cl::location(TimeIRParsingIsEnabled),
+                    cl::desc("Measure the time IR parsing takes"));
+
+cl::opt<NaClFileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+
+// General options for llc.  Other pass-specific options are specified
+// within the corresponding llc passes, and target-specific options
+// and back-end code generation options are specified with the target machine.
+//
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
+
+static cl::opt<unsigned>
+TimeCompilations("time-compilations", cl::Hidden, cl::init(1u),
+                 cl::value_desc("N"),
+                 cl::desc("Repeat compilation N times for timing"));
+
+// Using bitcode streaming has a couple of ramifications. Primarily it means
+// that the module in the file will be compiled one function at a time rather
+// than the whole module. This allows earlier functions to be compiled before
+// later functions are read from the bitcode but of course means no whole-module
+// optimizations. For now, streaming is only supported for files and stdin.
+static cl::opt<bool>
+LazyBitcode("streaming-bitcode",
+  cl::desc("Use lazy bitcode streaming for file inputs"),
+  cl::init(false));
+
+// The option below overlaps very much with bitcode streaming.
+// We keep it separate because it is still experimental and we want
+// to use it without changing the outside behavior which is especially
+// relevant for the sandboxed case.
+static cl::opt<bool>
+ReduceMemoryFootprint("reduce-memory-footprint",
+  cl::desc("Aggressively reduce memory used by llc"),
+  cl::init(false));
+
+static cl::opt<bool>
+PNaClABIVerify("pnaclabi-verify",
+  cl::desc("Verify PNaCl bitcode ABI before translating"),
+  cl::init(false));
+static cl::opt<bool>
+PNaClABIVerifyFatalErrors("pnaclabi-verify-fatal-errors",
+  cl::desc("PNaCl ABI verification errors are fatal"),
+  cl::init(false));
+
+// Determine optimization level.
+static cl::opt<char>
+OptLevel("O",
+         cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                  "(default = '-O2')"),
+         cl::Prefix,
+         cl::ZeroOrMore,
+         cl::init(' '));
+
+static cl::opt<std::string>
+UserDefinedTriple("mtriple", cl::desc("Set target triple"));
+
+cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
+                       cl::desc("Do not verify input module"));
+
+cl::opt<bool>
+DisableSimplifyLibCalls("disable-simplify-libcalls",
+                        cl::desc("Disable simplify-libcalls"),
+                        cl::init(false));
+
+static int compileModule(char**, LLVMContext&);
+
+// GetFileNameRoot - Helper function to get the basename of a filename.
+static inline std::string
+GetFileNameRoot(const std::string &InputFilename) {
+  std::string IFN = InputFilename;
+  std::string outputFilename;
+  int Len = IFN.length();
+  if ((Len > 2) &&
+      IFN[Len-3] == '.' &&
+      ((IFN[Len-2] == 'b' && IFN[Len-1] == 'c') ||
+       (IFN[Len-2] == 'l' && IFN[Len-1] == 'l'))) {
+    outputFilename = std::string(IFN.begin(), IFN.end()-3); // s/.bc/.s/
+  } else {
+    outputFilename = IFN;
+  }
+  return outputFilename;
+}
+
+static tool_output_file *GetOutputStream(const char *TargetName,
+                                         Triple::OSType OS,
+                                         const char *ProgName) {
+  // If we don't yet have an output filename, make one.
+  if (OutputFilename.empty()) {
+    if (InputFilename == "-")
+      OutputFilename = "-";
+    else {
+      OutputFilename = GetFileNameRoot(InputFilename);
+
+      switch (FileType) {
+      case TargetMachine::CGFT_AssemblyFile:
+        if (TargetName[0] == 'c') {
+          if (TargetName[1] == 0)
+            OutputFilename += ".cbe.c";
+          else if (TargetName[1] == 'p' && TargetName[2] == 'p')
+            OutputFilename += ".cpp";
+          else
+            OutputFilename += ".s";
+        } else
+          OutputFilename += ".s";
+        break;
+      case TargetMachine::CGFT_ObjectFile:
+        if (OS == Triple::Win32)
+          OutputFilename += ".obj";
+        else
+          OutputFilename += ".o";
+        break;
+      case TargetMachine::CGFT_Null:
+        OutputFilename += ".null";
+        break;
+      }
+    }
+  }
+
+  // Decide if we need "binary" output.
+  bool Binary = false;
+  switch (FileType) {
+  case TargetMachine::CGFT_AssemblyFile:
+    break;
+  case TargetMachine::CGFT_ObjectFile:
+  case TargetMachine::CGFT_Null:
+    Binary = true;
+    break;
+  }
+
+  // Open the file.
+  std::string error;
+  unsigned OpenFlags = 0;
+  if (Binary) OpenFlags |= raw_fd_ostream::F_Binary;
+  OwningPtr<tool_output_file> FDOut(
+      new tool_output_file(OutputFilename.c_str(), error, OpenFlags));
+  if (!error.empty()) {
+    errs() << error << '\n';
+    return 0;
+  }
+
+  return FDOut.take();
+}
+
+// main - Entry point for the llc compiler.
+//
+int llc_main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  // Enable debug stream buffering.
+  EnableDebugBuffering = true;
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets first, so that --version shows registered targets.
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmPrinters();
+#if !defined(__native_client__)
+  // Prune asm parsing from sandboxed translator.
+  // Do not prune "AsmPrinters" because that includes
+  // the direct object emission.
+  InitializeAllAsmParsers();
+#endif
+
+  // Initialize codegen and IR passes used by llc so that the -print-after,
+  // -print-before, and -stop-after options work.
+  PassRegistry *Registry = PassRegistry::getPassRegistry();
+  initializeCore(*Registry);
+  initializeCodeGen(*Registry);
+  initializeLoopStrengthReducePass(*Registry);
+  initializeLowerIntrinsicsPass(*Registry);
+  initializeUnreachableBlockElimPass(*Registry);
+
+  // Register the target printer for --version.
+  cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
+  // Enable the PNaCl ABI verifier by default in sandboxed mode.
+#if defined(__native_client__)
+  PNaClABIVerify = true;
+  PNaClABIVerifyFatalErrors = true;
+#endif
+
+  cl::ParseCommandLineOptions(argc, argv, "pnacl-llc\n");
+
+  // Compile the module TimeCompilations times to give better compile time
+  // metrics.
+  for (unsigned I = TimeCompilations; I; --I)
+    if (int RetVal = compileModule(argv, Context))
+      return RetVal;
+  return 0;
+}
+
+static void CheckABIVerifyErrors(PNaClABIErrorReporter &Reporter,
+                                 const Twine &Name) {
+  if (PNaClABIVerify && Reporter.getErrorCount() > 0) {
+    errs() << (PNaClABIVerifyFatalErrors ? "ERROR: " : "WARNING: ");
+    errs() << Name << " is not valid PNaCl bitcode:\n";
+    Reporter.printErrors(errs());
+    if (PNaClABIVerifyFatalErrors)
+      exit(1);
+  }
+  Reporter.reset();
+}
+
+static int compileModule(char **argv, LLVMContext &Context) {
+  // Load the module to be compiled...
+  SMDiagnostic Err;
+  std::auto_ptr<Module> M;
+  Module *mod = 0;
+  Triple TheTriple;
+
+  PNaClABIErrorReporter ABIErrorReporter;
+
+#if defined(__native_client__)
+  if (LazyBitcode) {
+    std::string StrError;
+    M.reset(getNaClStreamedBitcodeModule(
+        std::string("<SRPC stream>"),
+        NaClBitcodeStreamer, Context, &StrError));
+    if (!StrError.empty())
+      Err = SMDiagnostic(InputFilename, SourceMgr::DK_Error, StrError);
+  } else {
+    llvm_unreachable("native client SRPC only supports streaming");
+  }
+#else
+  {
+    // TODO: after the next merge this can be removed.
+    // https://code.google.com/p/nativeclient/issues/detail?id=3349
+    NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
+                       TimeIRParsingIsEnabled);
+    M.reset(NaClParseIRFile(InputFilename, InputFileFormat, Err, Context));
+  }
+#endif
+
+  mod = M.get();
+  if (mod == 0) {
+    Err.print(argv[0], errs());
+    return 1;
+  }
+
+  if (PNaClABIVerify) {
+    // Verify the module (but not the functions yet)
+    ModulePass *VerifyPass = createPNaClABIVerifyModulePass(&ABIErrorReporter,
+                                                            LazyBitcode);
+    VerifyPass->runOnModule(*mod);
+    CheckABIVerifyErrors(ABIErrorReporter, "Module");
+  }
+
+  // Add declarations for external functions required by PNaCl. The
+  // ResolvePNaClIntrinsics function pass running during streaming
+  // depends on these declarations being in the module.
+  OwningPtr<ModulePass> AddPNaClExternalDeclsPass(
+      createAddPNaClExternalDeclsPass());
+  AddPNaClExternalDeclsPass->runOnModule(*mod);
+
+  if (UserDefinedTriple.empty()) {
+    report_fatal_error("-mtriple must be set to a target triple for pnacl-llc");
+  } else {
+    mod->setTargetTriple(Triple::normalize(UserDefinedTriple));
+    TheTriple = Triple(mod->getTargetTriple());
+  }
+
+  // Get the target specific parser.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple,
+                                                         Error);
+  if (!TheTarget) {
+    errs() << argv[0] << ": " << Error;
+    return 1;
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (MAttrs.size()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
+  switch (OptLevel) {
+  default:
+    errs() << argv[0] << ": invalid optimization level.\n";
+    return 1;
+  case ' ': break;
+  case '0': OLvl = CodeGenOpt::None; break;
+  case '1': OLvl = CodeGenOpt::Less; break;
+  case '2': OLvl = CodeGenOpt::Default; break;
+  case '3': OLvl = CodeGenOpt::Aggressive; break;
+  }
+
+  TargetOptions Options;
+  Options.LessPreciseFPMADOption = EnableFPMAD;
+  Options.NoFramePointerElim = DisableFPElim;
+  Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf;
+  Options.AllowFPOpFusion = FuseFPOps;
+  Options.UnsafeFPMath = EnableUnsafeFPMath;
+  Options.NoInfsFPMath = EnableNoInfsFPMath;
+  Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.HonorSignDependentRoundingFPMathOption =
+      EnableHonorSignDependentRoundingFPMath;
+  Options.UseSoftFloat = GenerateSoftFloatCalls;
+  if (FloatABIForCalls != FloatABI::Default)
+    Options.FloatABIType = FloatABIForCalls;
+  Options.NoZerosInBSS = DontPlaceZerosInBSS;
+  Options.GuaranteedTailCallOpt = EnableGuaranteedTailCallOpt;
+  Options.DisableTailCalls = DisableTailCalls;
+  Options.StackAlignmentOverride = OverrideStackAlignment;
+  Options.RealignStack = EnableRealignStack;
+  Options.TrapFuncName = TrapFuncName;
+  Options.PositionIndependentExecutable = EnablePIE;
+  Options.EnableSegmentedStacks = SegmentedStacks;
+  Options.UseInitArray = UseInitArray;
+  Options.SSPBufferSize = SSPBufferSize;
+
+  std::auto_ptr<TargetMachine>
+    target(TheTarget->createTargetMachine(TheTriple.getTriple(),
+                                          MCPU, FeaturesStr, Options,
+                                          RelocModel, CMModel, OLvl));
+  assert(target.get() && "Could not allocate target machine!");
+  assert(mod && "Should have exited after outputting help!");
+  TargetMachine &Target = *target.get();
+
+  if (DisableDotLoc)
+    Target.setMCUseLoc(false);
+
+  if (DisableCFI)
+    Target.setMCUseCFI(false);
+
+  if (EnableDwarfDirectory)
+    Target.setMCUseDwarfDirectory(true);
+
+  if (GenerateSoftFloatCalls)
+    FloatABIForCalls = FloatABI::Soft;
+
+  // Disable .loc support for older OS X versions.
+  if (TheTriple.isMacOSX() &&
+      TheTriple.isMacOSXVersionLT(10, 6))
+    Target.setMCUseLoc(false);
+
+#if !defined(__native_client__)
+  // Figure out where we are going to send the output.
+  OwningPtr<tool_output_file> Out
+    (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
+  if (!Out) return 1;
+#endif
+
+  // Build up all of the passes that we want to do to the module.
+  OwningPtr<PassManagerBase> PM;
+  if (LazyBitcode || ReduceMemoryFootprint)
+    PM.reset(new FunctionPassManager(mod));
+  else
+    PM.reset(new PassManager());
+
+  // Add the ABI verifier pass before the analysis and code emission passes.
+  FunctionPass *FunctionVerifyPass = NULL;
+  if (PNaClABIVerify) {
+    FunctionVerifyPass = createPNaClABIVerifyFunctionsPass(&ABIErrorReporter);
+    PM->add(FunctionVerifyPass);
+  }
+
+  // Add the intrinsic resolution pass. It assumes ABI-conformant code.
+  PM->add(createResolvePNaClIntrinsicsPass());
+
+  // Add an appropriate TargetLibraryInfo pass for the module's triple.
+  TargetLibraryInfo *TLI = new TargetLibraryInfo(TheTriple);
+  if (DisableSimplifyLibCalls)
+    TLI->disableAllFunctions();
+  PM->add(TLI);
+
+  // Add intenal analysis passes from the target machine.
+  Target.addAnalysisPasses(*PM.get());
+
+  // Add the target data from the target machine, if it exists, or the module.
+  if (const DataLayout *TD = Target.getDataLayout())
+    PM->add(new DataLayout(*TD));
+  else
+    PM->add(new DataLayout(mod));
+
+  // Override default to generate verbose assembly.
+  Target.setAsmVerbosityDefault(true);
+
+  if (RelaxAll) {
+    if (FileType != TargetMachine::CGFT_ObjectFile)
+      errs() << argv[0]
+             << ": warning: ignoring -mc-relax-all because filetype != obj";
+    else
+      Target.setMCRelaxAll(true);
+  }
+
+#if defined __native_client__
+  {
+    raw_fd_ostream ROS(GetObjectFileFD(), true);
+    ROS.SetBufferSize(1 << 20);
+    formatted_raw_ostream FOS(ROS);
+
+    // Ask the target to add backend passes as necessary.
+    if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify)) {
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      return 1;
+    }
+
+    if (LazyBitcode || ReduceMemoryFootprint) {
+      FunctionPassManager* P = static_cast<FunctionPassManager*>(PM.get());
+      P->doInitialization();
+      for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
+        P->run(*I);
+        CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
+        if (ReduceMemoryFootprint) {
+          I->Dematerialize();
+        }
+      }
+      P->doFinalization();
+    } else {
+      static_cast<PassManager*>(PM.get())->run(*mod);
+    }
+    FOS.flush();
+    ROS.flush();
+  }
+#else
+
+  {
+    formatted_raw_ostream FOS(Out->os());
+
+    AnalysisID StartAfterID = 0;
+    AnalysisID StopAfterID = 0;
+    const PassRegistry *PR = PassRegistry::getPassRegistry();
+    if (!StartAfter.empty()) {
+      const PassInfo *PI = PR->getPassInfo(StartAfter);
+      if (!PI) {
+        errs() << argv[0] << ": start-after pass is not registered.\n";
+        return 1;
+      }
+      StartAfterID = PI->getTypeInfo();
+    }
+    if (!StopAfter.empty()) {
+      const PassInfo *PI = PR->getPassInfo(StopAfter);
+      if (!PI) {
+        errs() << argv[0] << ": stop-after pass is not registered.\n";
+        return 1;
+      }
+      StopAfterID = PI->getTypeInfo();
+    }
+
+    // Ask the target to add backend passes as necessary.
+    if (Target.addPassesToEmitFile(*PM, FOS, FileType, NoVerify,
+                                   StartAfterID, StopAfterID)) {
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      return 1;
+    }
+
+    // Before executing passes, print the final values of the LLVM options.
+    cl::PrintOptionValues();
+
+    if (LazyBitcode || ReduceMemoryFootprint) {
+      FunctionPassManager *P = static_cast<FunctionPassManager*>(PM.get());
+      P->doInitialization();
+      for (Module::iterator I = mod->begin(), E = mod->end(); I != E; ++I) {
+        P->run(*I);
+        CheckABIVerifyErrors(ABIErrorReporter, "Function " + I->getName());
+        if (ReduceMemoryFootprint) {
+          I->Dematerialize();
+        }
+      }
+      P->doFinalization();
+    } else {
+      static_cast<PassManager*>(PM.get())->run(*mod);
+    }
+  }
+
+  // Declare success.
+  Out->keep();
+#endif
+
+  return 0;
+}
+
+#if !defined(__native_client__)
+int
+main (int argc, char **argv) {
+  return llc_main(argc, argv);
+}
+#else
+// main() is in nacl_file.cpp.
+#endif
diff --git a/tools/pnacl-thaw/CMakeLists.txt b/tools/pnacl-thaw/CMakeLists.txt
new file mode 100644
index 0000000000..91b818efe6
--- /dev/null
+++ b/tools/pnacl-thaw/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitwriter naclbitreader)
+
+add_llvm_tool(pnacl-thaw
+  pnacl-thaw.cpp
+  )
diff --git a/tools/pnacl-thaw/LLVMBuild.txt b/tools/pnacl-thaw/LLVMBuild.txt
new file mode 100644
index 0000000000..864da2cbd5
--- /dev/null
+++ b/tools/pnacl-thaw/LLVMBuild.txt
@@ -0,0 +1,16 @@
+;===- ./tools/pnacl-thaw/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-thaw
+parent = Tools
+required_libraries = BitWriter NaClBitReader
diff --git a/tools/pnacl-thaw/Makefile b/tools/pnacl-thaw/Makefile
new file mode 100644
index 0000000000..8e7699e185
--- /dev/null
+++ b/tools/pnacl-thaw/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-thaw/Makefile ---------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-thaw
+LINK_COMPONENTS := bitwriter naclbitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-thaw/pnacl-thaw.cpp b/tools/pnacl-thaw/pnacl-thaw.cpp
new file mode 100644
index 0000000000..7f27bd58fc
--- /dev/null
+++ b/tools/pnacl-thaw/pnacl-thaw.cpp
@@ -0,0 +1,96 @@
+/* Copyright 2013 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+//===-- pnacl-thaw.cpp - The low-level NaCl bitcode thawer ----------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// Converts NaCl wire format back to LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+// Note: We need the following to provide the API for calling the NaCl
+// Bitcode Reader to read the frozen file.
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+// Note: We need the following to provide the API for calling the (LLVM)
+// Bitcode Writer to generate the corresponding LLVM bitcode file.
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify thawed pexe filename"),
+	       cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<frozen file>"), cl::init("-"));
+
+static void WriteOutputFile(const Module *M) {
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out
+    (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+			  raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    exit(1);
+  }
+
+  WriteBitcodeToFile(M, Out->os());
+
+  // Declare success.
+  Out->keep();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(
+      argc, argv, "Converts NaCl pexe wire format into LLVM bitcode format\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
+    else
+      DisplayFilename = InputFilename;
+    M.reset(getNaClStreamedBitcodeModule(
+        DisplayFilename, streamer, Context,
+        &ErrorMessage, /*AcceptSupportedOnly=*/false));
+    if(M.get() != 0 && M->MaterializeAllPermanently(&ErrorMessage)) {
+      M.reset();
+    }
+  }
+
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+
+  WriteOutputFile(M.get());
+  return 0;
+}
diff --git a/tools/pso-stub/CMakeLists.txt b/tools/pso-stub/CMakeLists.txt
new file mode 100644
index 0000000000..4b2f779cb0
--- /dev/null
+++ b/tools/pso-stub/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader bitwriter object support analysis)
+
+add_llvm_tool(pso-stub
+  pso-stub.cpp
+  )
diff --git a/tools/pso-stub/LLVMBuild.txt b/tools/pso-stub/LLVMBuild.txt
new file mode 100644
index 0000000000..e643053dbf
--- /dev/null
+++ b/tools/pso-stub/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pso-stub/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pso-stub
+parent = Tools
+required_libraries = BitReader BitWriter Object Support Analysis
diff --git a/tools/pso-stub/Makefile b/tools/pso-stub/Makefile
new file mode 100644
index 0000000000..c2860e65f6
--- /dev/null
+++ b/tools/pso-stub/Makefile
@@ -0,0 +1,18 @@
+##===- tools/pso-stub/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pso-stub
+LINK_COMPONENTS := bitreader bitwriter object support analysis
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/pso-stub/pso-stub.cpp b/tools/pso-stub/pso-stub.cpp
new file mode 100644
index 0000000000..26ce0c5056
--- /dev/null
+++ b/tools/pso-stub/pso-stub.cpp
@@ -0,0 +1,309 @@
+/*===- pso-stub.c - Create bitcode shared object stubs  -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Create a bitcode stub for a native shared object.
+// Usage: pso-stub <input.so> -o <output.pso>
+//
+// The stub bitcode file contains the same dynamic symbols as the input shared
+// object, with identical attributes (e.g. weak, undefined, TLS).
+//
+// Undefined functions become declarations in the bitcode.
+// Undefined variables become external variable declarations in the bitcode.
+// Defined functions become trivial stub functions in the bitcode (which do
+// nothing but "ret void").
+// Defined object/tls symbols became dummy variable definitions (int foo = 0).
+//
+// The generated bitcode is suitable for linking against (as a shared object),
+// but nothing else.
+//
+// TODO(pdox): Implement GNU symbol versioning.
+// TODO(pdox): Mark IFUNC symbols as functions, and store
+//             this attribute as metadata.
+//===----------------------------------------------------------------------===*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/APInt.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+
+cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input native shared object>"),
+              cl::init(""));
+
+cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
+
+// Variables / declarations to place in llvm.used array.
+std::vector<GlobalValue*> LLVMUsed;
+
+void AddUsedGlobal(GlobalValue *GV) {
+  // Clang normally asserts that these are not decls.  We do need
+  // decls to survive though, and those are really the ones we
+  // worry about, so only add those.
+  // We run verifyModule() below, so that we know this is somewhat valid.
+  if (GV->isDeclaration()) {
+    LLVMUsed.push_back(GV);
+  }
+}
+
+// Emit llvm.used array.
+// This is almost exactly like clang/lib/CodeGen/CodeGenModule.cpp::EmitLLVMUsed
+void EmitLLVMUsed(Module *M) {
+  // Don't create llvm.used if there is no need.
+  if (LLVMUsed.empty())
+    return;
+
+  Type *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
+  // Convert LLVMUsed to what ConstantArray needs.
+  SmallVector<llvm::Constant*, 8> UsedArray;
+  UsedArray.resize(LLVMUsed.size());
+  for (unsigned i = 0, e = LLVMUsed.size(); i != e; ++i) {
+    UsedArray[i] =
+     llvm::ConstantExpr::getBitCast(cast<llvm::Constant>(&*LLVMUsed[i]),
+                                    Int8PtrTy);
+  }
+
+  if (UsedArray.empty())
+    return;
+  llvm::ArrayType *ATy = llvm::ArrayType::get(Int8PtrTy, UsedArray.size());
+
+  llvm::GlobalVariable *GV =
+    new llvm::GlobalVariable(*M, ATy, false,
+                             llvm::GlobalValue::AppendingLinkage,
+                             llvm::ConstantArray::get(ATy, UsedArray),
+                             "llvm.used");
+
+  GV->setSection("llvm.metadata");
+}
+
+// Add a stub function definition or declaration
+void
+AddFunction(Module *M,
+            GlobalValue::LinkageTypes Linkage,
+            const StringRef &Name,
+            bool isDefine) {
+  // Create an empty function with no arguments.
+  // void Name(void);
+  Type *RetTy = Type::getVoidTy(M->getContext());
+  FunctionType *FT = FunctionType::get(RetTy, /*isVarArg=*/ false);
+  Function *F = Function::Create(FT, Linkage, Name, M);
+  if (isDefine) {
+    // Add a single basic block with "ret void"
+    BasicBlock *BB = BasicBlock::Create(F->getContext(), "", F);
+    BB->getInstList().push_back(ReturnInst::Create(F->getContext()));
+  }
+  AddUsedGlobal(F);
+}
+
+// Add a stub global variable declaration or definition.
+void
+AddGlobalVariable(Module *M,
+          GlobalValue::LinkageTypes Linkage,
+          const StringRef &Name,
+          bool isTLS,
+          bool isDefine) {
+  // Use 'int' as the dummy type.
+  Type *Ty = Type::getInt32Ty(M->getContext());
+
+  Constant *InitVal = NULL;
+  if (isDefine) {
+    // Define to dummy value, 0.
+    InitVal = Constant::getIntegerValue(Ty, APInt(32, 0));
+  }
+  GlobalVariable *GV =
+    new GlobalVariable(*M, Ty, /*isConstant=*/ false,
+                       Linkage, /*Initializer=*/ InitVal,
+                       Twine(Name), /*InsertBefore=*/ NULL,
+                       isTLS ? GlobalVariable::GeneralDynamicTLSModel :
+                               GlobalVariable::NotThreadLocal,
+                       /*AddressSpace=*/ 0);
+  AddUsedGlobal(GV);
+}
+
+// Iterate through the ObjectFile's needed libraries, and
+// add them to the module.
+void TransferLibrariesNeeded(Module *M, const ObjectFile *obj) {
+  library_iterator it = obj->begin_libraries_needed();
+  library_iterator ie = obj->end_libraries_needed();
+  error_code ec;
+  for (; it != ie; it.increment(ec)) {
+    StringRef path;
+    it->getPath(path);
+    outs() << "Adding library " << path << "\n";
+    M->addLibrary(path);
+  }
+}
+
+// Set the Module's SONAME from the ObjectFile
+void TransferLibraryName(Module *M, const ObjectFile *obj) {
+  StringRef soname = obj->getLoadName();
+  outs() << "Setting soname to: " << soname << "\n";
+  M->setSOName(soname);
+}
+
+// Create stubs in the module for the dynamic symbols
+void TransferDynamicSymbols(Module *M, const ObjectFile *obj) {
+  // Iterate through the dynamic symbols in the ObjectFile.
+  symbol_iterator it = obj->begin_dynamic_symbols();
+  symbol_iterator ie = obj->end_dynamic_symbols();
+  error_code ec;
+  for (; it != ie; it.increment(ec)) {
+    const SymbolRef &sym = *it;
+    StringRef Name;
+    SymbolRef::Type Type;
+    uint32_t Flags;
+
+    sym.getName(Name);
+    sym.getType(Type);
+    sym.getFlags(Flags);
+
+    // Ignore debug info and section labels
+    if (Flags & SymbolRef::SF_FormatSpecific)
+      continue;
+
+    // Ignore local symbols
+    if (!(Flags & SymbolRef::SF_Global))
+      continue;
+    outs() << "Transferring symbol " << Name << "\n";
+
+    bool isFunc = (Type == SymbolRef::ST_Function);
+    bool isUndef = (Flags & SymbolRef::SF_Undefined);
+    bool isTLS = (Flags & SymbolRef::SF_ThreadLocal);
+    bool isCommon = (Flags & SymbolRef::SF_Common);
+    bool isWeak = (Flags & SymbolRef::SF_Weak);
+
+    if (Type == SymbolRef::ST_Unknown) {
+      // Weak symbols can be "v" according to NM, which are definitely
+      // data, but they may also be "w", which are of unknown type.
+      // Thus there is already a mechanism to say "weak object", but not
+      // for weak function.  Assume unknown weak symbols are functions.
+      if (isWeak) {
+        outs() << "Warning: Symbol '" << Name <<
+            "' has unknown type (weak). Assuming function.\n";
+        Type = SymbolRef::ST_Function;
+        isFunc = true;
+      } else {
+        // If it is undef, we likely don't care, since it won't be used
+        // to bind to unresolved symbols in the real pexe and real pso.
+        // Other cases seen where it is not undef: _end, __bss_start,
+        // which are markers provided by the linker scripts.
+        outs() << "Warning: Symbol '" << Name <<
+            "' has unknown type (isUndef=" << isUndef << "). Assuming data.\n";
+        Type = SymbolRef::ST_Data;
+        isFunc = false;
+      }
+    }
+
+    // Determine Linkage type.
+    GlobalValue::LinkageTypes Linkage;
+    if (isWeak)
+      Linkage = isUndef ? GlobalValue::ExternalWeakLinkage :
+                          GlobalValue::WeakAnyLinkage;
+    else if (isCommon)
+      Linkage = GlobalValue::CommonLinkage;
+    else
+      Linkage = GlobalValue::ExternalLinkage;
+
+    if (isFunc)
+      AddFunction(M, Linkage, Name, !isUndef);
+    else
+      AddGlobalVariable(M, Linkage, Name, isTLS, !isUndef);
+  }
+}
+
+}  // namespace
+
+
+int main(int argc, const char** argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv,
+                              "Portable Shared Object Stub Maker\n");
+
+  if (InputFilename.empty()) {
+    errs() << "Please specify an input filename\n";
+    return 1;
+  }
+  if (OutputFilename.empty()) {
+    errs() << "Please specify an output filename with -o\n";
+    return 1;
+  }
+
+  // Open the object file
+  OwningPtr<MemoryBuffer> File;
+  if (MemoryBuffer::getFile(InputFilename, File)) {
+    errs() << InputFilename << ": Open failed\n";
+    return 1;
+  }
+
+  ObjectFile *obj = ObjectFile::createObjectFile(File.take());
+  if (!obj) {
+    errs() << InputFilename << ": Object type not recognized\n";
+  }
+
+  // Create the new module
+  OwningPtr<Module> M(new Module(InputFilename, Context));
+
+  // Transfer the relevant ELF information
+  M->setOutputFormat(Module::SharedOutputFormat);
+  TransferLibrariesNeeded(M.get(), obj);
+  TransferLibraryName(M.get(), obj);
+  TransferDynamicSymbols(M.get(), obj);
+  EmitLLVMUsed(M.get());
+
+  // Verify the module
+  std::string Err;
+  if (verifyModule(*M.get(), ReturnStatusAction, &Err)) {
+    errs() << "Module created is invalid:\n";
+    errs() << Err;
+    return 1;
+  }
+
+  // Write the module to a file
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out(
+      new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                           raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+  WriteBitcodeToFile(M.get(), Out->os());
+  Out->keep();
+  return 0;
+}
diff --git a/utils/Makefile b/utils/Makefile
index ecb30bed7c..d117b5a87f 100644
--- a/utils/Makefile
+++ b/utils/Makefile
@@ -11,6 +11,15 @@ LEVEL = ..
 PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
 	      count fpcmp llvm-lit not unittest
 
+ifeq ($(NACL_SANDBOX),1)
+  # In sandboxed mode, just build the bare minimum
+  # Note: TableGen is usually built twice: 
+  #       * once with host compiler
+  #       * also with the "given" compiler
+  #  Here we just disable that second build
+  PARALLEL_DIRS := 
+endif
+
 EXTRA_DIST := check-each-file codegen-diff countloc.sh \
               DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
 	      getsrcs.sh llvmdo llvmgrep llvm-native-gcc \
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 8b292b9572..ab2b002b19 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -316,6 +316,12 @@ void CodeGenTarget::ComputeInstrsByEnum() const {
     "BUNDLE",
     "LIFETIME_START",
     "LIFETIME_END",
+    // @LOCALMOD-BEGIN
+    "BUNDLE_ALIGN_START",
+    "BUNDLE_ALIGN_END",
+    "BUNDLE_LOCK",
+    "BUNDLE_UNLOCK",
+    // @LOCALMOD-END
     0
   };
   const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();