Apply after-merge fixes to return to working state.

author: Eli Bendersky <eliben@chromium.org> 2013-03-11 15:38:11 -0700
committer: Eli Bendersky <eliben@chromium.org> 2013-03-20 14:49:21 -0700
commit: d41567d2ffd3413600162653c08b2365bd5bcbbf (patch)
tree: aa1c212bcf816f4011315b80826acfb85ae7d9f3
parent: 23c00401dad33ca247d2818e71540079bed63c5b (diff)
33 files changed, 202 insertions, 1682 deletions
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index f8e390c753..b79f9f9816 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -352,16 +352,6 @@ void TargetPassConfig::addIRPasses() {
   addPass(createTypeBasedAliasAnalysisPass());
   addPass(createBasicAliasAnalysisPass());
 
-  // @LOCALMOD-START
-  addPass(createNaClCcRewritePass(TM->getTargetLowering()));
-  // TODO: consider adding a cleanup pass, e.g. constant propagation
-  // Note: we run this before the verfier step because it may cause
-  // a *temporary* inconsistency:
-  //   A function may have been rewritting before we are rewriting
-  //   its callers - which would lead to a parameter mismatch complaint
-  //   from the verifier.
-  // @LOCALMOD-END
-
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
   if (!DisableVerify)
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
deleted file mode 100644
index e5ec0b83b8..0000000000
--- a/lib/Linker/LinkArchives.cpp
+++ /dev/null
@@ -1,213 +0,0 @@
-//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains routines to handle linking together LLVM bitcode files,
-// and to handle annoying things like static libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/Bitcode/Archive.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h" // @LOCALMOD
-
-#include <memory>
-#include <set>
-using namespace llvm;
-
-// @LOCALMOD-START
-// NOTE: this has a similar effect as
-//        tools/llvm/llvm-preserve.ll
-// which in turn is similar to the GNUS's attribute((used))
-// TODO(robertm): This is a little hackish for now
-static cl::list<std::string>
-UndefList("referenced-list", cl::value_desc("list"),
-          cl::desc("A list of symbols assumed to be referenced externally"),
-          cl::CommaSeparated);
-// @LOCALMOD-END
-  
-/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
-/// exist in an LLVM module. This is a bit tricky because there may be two
-/// symbols with the same name but different LLVM types that will be resolved to
-/// each other but aren't currently (thus we need to treat it as resolved).
-///
-/// Inputs:
-///  M - The module in which to find undefined symbols.
-///
-/// Outputs:
-///  UndefinedSymbols - A set of C++ strings containing the name of all
-///                     undefined symbols.
-///
-static void
-GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
-  std::set<std::string> DefinedSymbols;
-  UndefinedSymbols.clear();
-  // @LOCALMOD-START
-  UndefinedSymbols.insert(UndefList.begin(), UndefList.end());
-  // @LOCALMOD-END
-  
-  // If the program doesn't define a main, try pulling one in from a .a file.
-  // This is needed for programs where the main function is defined in an
-  // archive, such f2c'd programs.
-  Function *Main = M->getFunction("main");
-  if (Main == 0 || Main->isDeclaration())
-    UndefinedSymbols.insert("main");
-
-  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
-    if (I->hasName()) {
-      if (I->isDeclaration())
-        UndefinedSymbols.insert(I->getName());
-      else if (!I->hasLocalLinkage()) {
-        assert(!I->hasDLLImportLinkage()
-               && "Found dllimported non-external symbol!");
-        DefinedSymbols.insert(I->getName());
-      }      
-    }
-
-  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I)
-    if (I->hasName()) {
-      if (I->isDeclaration())
-        UndefinedSymbols.insert(I->getName());
-      else if (!I->hasLocalLinkage()) {
-        assert(!I->hasDLLImportLinkage()
-               && "Found dllimported non-external symbol!");
-        DefinedSymbols.insert(I->getName());
-      }      
-    }
-
-  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
-       I != E; ++I)
-    if (I->hasName())
-      DefinedSymbols.insert(I->getName());
-
-  // Prune out any defined symbols from the undefined symbols set...
-  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
-       I != UndefinedSymbols.end(); )
-    if (DefinedSymbols.count(*I))
-      UndefinedSymbols.erase(I++);  // This symbol really is defined!
-    else
-      ++I; // Keep this symbol in the undefined symbols list
-}
-
-/// LinkInArchive - opens an archive library and link in all objects which
-/// provide symbols that are currently undefined.
-///
-/// Inputs:
-///  Filename - The pathname of the archive.
-///
-/// Return Value:
-///  TRUE  - An error occurred.
-///  FALSE - No errors.
-bool
-Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
-  // Make sure this is an archive file we're dealing with
-  if (!Filename.isArchive())
-    return error("File '" + Filename.str() + "' is not an archive.");
-
-  // Open the archive file
-  verbose("Linking archive file '" + Filename.str() + "'");
-
-  // Find all of the symbols currently undefined in the bitcode program.
-  // If all the symbols are defined, the program is complete, and there is
-  // no reason to link in any archive files.
-  std::set<std::string> UndefinedSymbols;
-  GetAllUndefinedSymbols(Composite, UndefinedSymbols);
-
-  if (UndefinedSymbols.empty()) {
-    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
-    return false;  // No need to link anything in!
-  }
-
-  std::string ErrMsg;
-  std::auto_ptr<Archive> AutoArch (
-    Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
-
-  Archive* arch = AutoArch.get();
-
-  if (!arch)
-    return error("Cannot read archive '" + Filename.str() +
-                 "': " + ErrMsg);
-  if (!arch->isBitcodeArchive()) {
-    is_native = true;
-    return false;
-  }
-  is_native = false;
-
-  // Save a set of symbols that are not defined by the archive. Since we're
-  // entering a loop, there's no point searching for these multiple times. This
-  // variable is used to "set_subtract" from the set of undefined symbols.
-  std::set<std::string> NotDefinedByArchive;
-
-  // Save the current set of undefined symbols, because we may have to make
-  // multiple passes over the archive:
-  std::set<std::string> CurrentlyUndefinedSymbols;
-
-  do {
-    CurrentlyUndefinedSymbols = UndefinedSymbols;
-
-    // Find the modules we need to link into the target module.  Note that arch
-    // keeps ownership of these modules and may return the same Module* from a
-    // subsequent call.
-    SmallVector<Module*, 16> Modules;
-    if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
-      return error("Cannot find symbols in '" + Filename.str() + 
-                   "': " + ErrMsg);
-
-    // If we didn't find any more modules to link this time, we are done
-    // searching this archive.
-    if (Modules.empty())
-      break;
-
-    // Any symbols remaining in UndefinedSymbols after
-    // findModulesDefiningSymbols are ones that the archive does not define. So
-    // we add them to the NotDefinedByArchive variable now.
-    NotDefinedByArchive.insert(UndefinedSymbols.begin(),
-        UndefinedSymbols.end());
-
-    // Loop over all the Modules that we got back from the archive
-    for (SmallVectorImpl<Module*>::iterator I=Modules.begin(), E=Modules.end();
-         I != E; ++I) {
-
-      // Get the module we must link in.
-      std::string moduleErrorMsg;
-      Module* aModule = *I;
-      if (aModule != NULL) {
-        if (aModule->MaterializeAll(&moduleErrorMsg))
-          return error("Could not load a module: " + moduleErrorMsg);
-
-        verbose("  Linking in module: " + aModule->getModuleIdentifier());
-
-        // Link it in
-        if (LinkInModule(aModule, &moduleErrorMsg))
-          return error("Cannot link in module '" +
-                       aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
-      } 
-    }
-    
-    // Get the undefined symbols from the aggregate module. This recomputes the
-    // symbols we still need after the new modules have been linked in.
-    GetAllUndefinedSymbols(Composite, UndefinedSymbols);
-
-    // At this point we have two sets of undefined symbols: UndefinedSymbols
-    // which holds the undefined symbols from all the modules, and
-    // NotDefinedByArchive which holds symbols we know the archive doesn't
-    // define. There's no point searching for symbols that we won't find in the
-    // archive so we subtract these sets.
-    set_subtract(UndefinedSymbols, NotDefinedByArchive);
-
-    // If there's no symbols left, no point in continuing to search the
-    // archive.
-    if (UndefinedSymbols.empty())
-      break;
-  } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
-
-  return false;
-}
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
deleted file mode 100644
index 0ab551d14d..0000000000
--- a/lib/Linker/LinkItems.cpp
+++ /dev/null
@@ -1,242 +0,0 @@
-//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains routines to handle linking together LLVM bitcode files,
-// and to handle annoying things like static libraries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Linker.h"
-#include "llvm/Bitcode/ReaderWriter.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/system_error.h"
-using namespace llvm;
-
-// LinkItems - This function is the main entry point into linking. It takes a
-// list of LinkItem which indicates the order the files should be linked and
-// how each file should be treated (plain file or with library search). The
-// function only links bitcode and produces a result list of items that are
-// native objects. 
-bool
-Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
-  // Clear the NativeItems just in case
-  NativeItems.clear();
-
-  // For each linkage item ...
-  for (ItemList::const_iterator I = Items.begin(), E = Items.end();
-       I != E; ++I) {
-    if (I->second) {
-      // Link in the library suggested.
-      bool is_native = false;
-      if (LinkInLibrary(I->first, is_native))
-        return true;
-      if (is_native)
-        NativeItems.push_back(*I);
-    } else {
-      // Link in the file suggested
-      bool is_native = false;
-      if (LinkInFile(sys::Path(I->first), is_native))
-        return true;
-      if (is_native)
-        NativeItems.push_back(*I);
-    }
-  }
-
-  // @LOCALMOD-BEGIN
-  // At this point we have processed all the link items provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  bool is_native;
-  for (Module::lib_iterator I = Composite->lib_begin(),
-         E = Composite->lib_end(); I != E; ++I) {
-    if(LinkInLibrary(*I, is_native))
-      return true;
-    if (is_native)
-      NativeItems.push_back(std::make_pair(*I, true));
-  }
-  // @LOCALMOD-END
-  return false;
-}
-
-
-/// LinkInLibrary - links one library into the HeadModule.
-///
-bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
-  is_native = false;
-  // Determine where this library lives.
-  sys::Path Pathname = FindLib(Lib);
-  if (Pathname.isEmpty())
-    return error("Cannot find library '" + Lib.str() + "'");
-
-  // If its an archive, try to link it in
-  std::string Magic;
-  Pathname.getMagicNumber(Magic, 64);
-  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: llvm_unreachable("Bad file type identification");
-    case sys::Unknown_FileType:
-      return warning("Supposed library '" + Lib.str() + "' isn't a library.");
-
-    case sys::Bitcode_FileType:
-      // LLVM ".so" file.
-      if (LinkInFile(Pathname, is_native))
-        return true;
-      break;
-
-    case sys::Archive_FileType:
-      if (LinkInArchive(Pathname, is_native))
-        return error("Cannot link archive '" + Pathname.str() + "'");
-      break;
-
-    case sys::ELF_Relocatable_FileType:
-    case sys::ELF_SharedObject_FileType:
-    case sys::Mach_O_Object_FileType:
-    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case sys::COFF_FileType:
-      is_native = true;
-      break;
-  }
-  return false;
-}
-
-/// LinkLibraries - takes the specified library files and links them into the
-/// main bitcode object file.
-///
-/// Inputs:
-///  Libraries  - The list of libraries to link into the module.
-///
-/// Return value:
-///  FALSE - No error.
-///  TRUE  - Error.
-///
-bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
-
-  // Process the set of libraries we've been provided.
-  bool is_native = false;
-  for (unsigned i = 0; i < Libraries.size(); ++i)
-    if (LinkInLibrary(Libraries[i], is_native))
-      return true;
-  // @LOCALMOD-BEGIN
-  // At this point we have processed all the libraries provided to us. Since
-  // we have an aggregated module at this point, the dependent libraries in
-  // that module should also be aggregated with duplicates eliminated. This is
-  // now the time to process the dependent libraries to resolve any remaining
-  // symbols.
-  const Module::LibraryListType& DepLibs = Composite->getLibraries();
-  for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
-         E = DepLibs.end(); I != E; ++I)
-    if (LinkInLibrary(*I, is_native))
-      return true;
-  // @LOCALMOD-END
-  return false;
-}
-
-/// LinkInFile - opens a bitcode file and links in all objects which
-/// provide symbols that are currently undefined.
-///
-/// Inputs:
-///  File - The pathname of the bitcode file.
-///
-/// Outputs:
-///  ErrorMessage - A C++ string detailing what error occurred, if any.
-///
-/// Return Value:
-///  TRUE  - An error occurred.
-///  FALSE - No errors.
-///
-bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
-  is_native = false;
-  
-  // Check for a file of name "-", which means "read standard input"
-  if (File.str() == "-") {
-    std::auto_ptr<Module> M;
-    OwningPtr<MemoryBuffer> Buffer;
-    error_code ec;
-    if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
-      if (!Buffer->getBufferSize()) {
-        Error = "standard input is empty";
-      } else {
-        M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
-        if (M.get())
-          if (!LinkInModule(M.get(), &Error))
-            return false;
-      }
-    }
-    return error("Cannot link stdin: " + ec.message());
-  }
-
-  // Determine what variety of file it is.
-  std::string Magic;
-  if (!File.getMagicNumber(Magic, 64))
-    return error("Cannot find linker input '" + File.str() + "'");
-
-  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
-    default: llvm_unreachable("Bad file type identification");
-    case sys::Unknown_FileType:
-      return warning("Ignoring file '" + File.str() + 
-                   "' because does not contain bitcode.");
-
-    case sys::Archive_FileType:
-      // A user may specify an ar archive without -l, perhaps because it
-      // is not installed as a library. Detect that and link the archive.
-      if (LinkInArchive(File, is_native))
-        return true;
-      break;
-
-    case sys::Bitcode_FileType: {
-      verbose("Linking bitcode file '" + File.str() + "'");
-      std::auto_ptr<Module> M(LoadObject(File));
-      if (M.get() == 0)
-        return error("Cannot load file '" + File.str() + "': " + Error);
-      if (LinkInModule(M.get(), &Error))
-        return error("Cannot link file '" + File.str() + "': " + Error);
-
-      verbose("Linked in file '" + File.str() + "'");
-      break;
-    }
-
-    case sys::ELF_Relocatable_FileType:
-    case sys::ELF_SharedObject_FileType:
-    case sys::Mach_O_Object_FileType:
-    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
-    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-    case sys::COFF_FileType:
-      is_native = true;
-      break;
-  }
-  return false;
-}
-
-/// LinkFiles - takes a module and a list of files and links them all together.
-/// It locates the file either in the current directory, as its absolute
-/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
-///
-/// Inputs:
-///  Files      - A vector of sys::Path indicating the LLVM bitcode filenames
-///               to be linked.  The names can refer to a mixture of pure LLVM
-///               bitcode files and archive (ar) formatted files.
-///
-/// Return value:
-///  FALSE - No errors.
-///  TRUE  - Some error occurred.
-///
-bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
-  bool is_native;
-  for (unsigned i = 0; i < Files.size(); ++i)
-    if (LinkInFile(Files[i], is_native))
-      return true;
-  return false;
-}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index e835eea7df..1dff3f2977 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -176,7 +176,8 @@ void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
 
 void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
   // @LOCALMOD-BEGIN
-  if (getAssembler().getBackend().CustomExpandInst(Inst, *this)) {
+  if (getAssembler().isBundlingEnabled() &&
+      getAssembler().getBackend().CustomExpandInst(Inst, *this)) {
     return;
   }
   // @LOCALMOD-END
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 65b334bac4..c48df8a96f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1824,10 +1824,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // Non-Darwin binutils don't yet support the "trap" mnemonic.
     // FIXME: Remove this special case when they do.
     if (!Subtarget->isTargetDarwin()) {
-      // @LOCALMOD-START
-      //.long 0xe7fedef0 @ trap
-      uint32_t Val = 0xe7fedef0UL;
-      // @LOCALMOD-END
+      //.long 0xe7ffdefe @ trap
+      uint32_t Val = 0xe7ffdefeUL;
       OutStreamer.AddComment("trap");
       OutStreamer.EmitIntValue(Val, 4);
       return;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 1e54c3bb74..0501ee4c1d 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -74,7 +74,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasVFPv4 = false;
   HasNEON = false;
   UseNEONForSinglePrecisionFP = false;
-  UseInlineJumpTables(!NoInlineJumpTables);
+  UseInlineJumpTables = !NoInlineJumpTables;
   UseMulOps = UseFusedMulOps;
   SlowFPVMLx = false;
   HasVMLxForwarding = false;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index c39d3fad3b..6c080408dd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -49,7 +49,6 @@ namespace {
     virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
                                   bool IsPCRel, bool IsRelocWithSymbol,
                                   int64_t Addend) const;
-    virtual unsigned getEFlags() const;
     virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
                                            const MCValue &Target,
                                            const MCFragment &F,
@@ -68,22 +67,6 @@ MipsELFObjectWriter::MipsELFObjectWriter(bool _is64Bit, uint8_t OSABI,
 
 MipsELFObjectWriter::~MipsELFObjectWriter() {}
 
-// FIXME: get the real EABI Version from the Subtarget class.
-unsigned MipsELFObjectWriter::getEFlags() const {
-
-  // FIXME: We can't tell if we are PIC (dynamic) or CPIC (static)
-  unsigned Flag = ELF::EF_MIPS_NOREORDER;
-
-  if (is64Bit())
-    Flag |= ELF::EF_MIPS_ARCH_64R2;
-  else
-    Flag |= ELF::EF_MIPS_ARCH_32R2;
-  /* @LOCLAMOD-START */
-  if (RelocModelOption == Reloc::PIC_ || RelocModelOption == Reloc::Default)
-    Flag |= ELF::EF_MIPS_PIC;
-  /* @LOCLAMOD-END */
-  return Flag;
-}
 
 const MCSymbol *MipsELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
                                                     const MCValue &Target,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index c77be4f743..ea29621ae2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -133,11 +133,14 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
   Triple TheTriple(TT);
 
   // @LOCALMOD-BEGIN
-  MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter,
-                                           RelaxAll, NoExecStack);
-  if (TheTriple.isOSNaCl())
+  if (TheTriple.isOSNaCl()) {
+    MCStreamer *Streamer = createELFStreamer(Ctx, MAB, _OS, _Emitter,
+                                             RelaxAll, NoExecStack);
     Streamer->EmitBundleAlignMode(4);
-  return Streamer;
+    return Streamer;
+  } else {
+    return createMipsELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+  }
   // @LOCALMOD-END
 }
 
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 58a009ff02..5e0a80f416 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,11 +13,11 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
-#include "MipsAsmPrinter.h"
 #include "InstPrinter/MipsInstPrinter.h"
 #include "MCTargetDesc/MipsBaseInfo.h"
 #include "MCTargetDesc/MipsELFStreamer.h"
 #include "Mips.h"
+#include "MipsAsmPrinter.h"
 #include "MipsInstrInfo.h"
 #include "MipsMCInstLower.h"
 #include "llvm/ADT/SmallString.h"
@@ -67,7 +67,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
-  // @LOCALMOD-START
+  // @LOCALMOD-BEGIN:
   // Do any auto-generated pseudo lowerings.
   if (emitPseudoExpansionLowering(OutStreamer, MI))
     return;
@@ -81,15 +81,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (emitPseudoExpansionLowering(OutStreamer, &*I))
       continue;
 
-    // The inMips16Mode() test is not permanent.
-    // Some instructions are marked as pseudo right now which
-    // would make the test fail for the wrong reason but
-    // that will be fixed soon. We need this here because we are
-    // removing another test for this situation downstream in the
-    // callchain.
-    //
-    if (I->isPseudo() && !Subtarget->inMips16Mode())
-      llvm_unreachable("Pseudo opcode found in EmitInstruction()");
+    // @LOCALMOD: the I->isPseudo() assertion here has been removed because
+    // we may have SFI pseudos in I.
 
     MCInst TmpInst0;
     MCInstLowering.Lower(I, TmpInst0);
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index af81426bc0..49475d1740 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -70,7 +70,6 @@ namespace {
       return "Mips Delay Slot Filler";
     }
 
-    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
     bool runOnMachineFunction(MachineFunction &F) {
       if (SkipDelaySlotFiller)
         return false;
@@ -169,7 +168,6 @@ bool RegDefsUses::isRegInSet(const BitVector &RegSet, unsigned Reg) const {
 /// We assume there is only one delay slot per delayed instruction.
 bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
-  LastFiller = MBB.instr_end();
 
   for (Iter I = MBB.begin(); I != MBB.end(); ++I) {
     if (!I->hasDelaySlot())
@@ -192,7 +190,6 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
   }
 
   return Changed;
-
 }
 
 /// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
@@ -206,11 +203,9 @@ extern bool IsDangerousLoad(const MachineInstr &MI, int *AddrIdx);
 extern bool IsDangerousStore(const MachineInstr &MI, int *AddrIdx);
 // @LOCALMOD-END
 
-bool Filler::findDelayInstr(MachineBasicBlock &MBB,
-                            InstrIter slot,
-                            InstrIter &Filler) {
-  SmallSet<unsigned, 32> RegDefs;
-  SmallSet<unsigned, 32> RegUses;
+bool Filler::findDelayInstr(MachineBasicBlock &MBB, Iter Slot,
+                            Iter &Filler) const {
+  RegDefsUses RegDU(TM);
 
   RegDU.init(*Slot);
 
@@ -223,17 +218,22 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
       continue;
 
     // @LOCALMOD-START - Don't put in delay slot instructions that could be masked.
-    int Dummy;
-    if (terminateSearch(*I) || (Triple(TM.getTargetTriple()).isOSNaCl() &&
-                                (IsDangerousLoad(*FI, &Dummy)
-                                || IsDangerousStore(*FI, &Dummy)
-                                || FI->modifiesRegister(Mips::SP, TM.getRegisterInfo()))))
-      break;
-    // @LOCALMOD-END
     //
     // Should not allow:
     // ERET, DERET or WAIT, PAUSE. Need to add these to instruction
     // list. TBD.
+    if (Triple(TM.getTargetTriple()).isOSNaCl()) {
+      int Dummy;
+      Iter FI(llvm::next(I).base());
+      if (terminateSearch(*I) || (IsDangerousLoad(*FI, &Dummy)
+                              || IsDangerousStore(*FI, &Dummy)
+                              || FI->modifiesRegister(Mips::SP, TM.getRegisterInfo())))
+        break;
+    } else {
+      if (terminateSearch(*I))
+        break;
+    }
+    // @LOCALMOD-END
 
     if (delayHasHazard(*I, SawLoad, SawStore, RegDU))
       continue;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 7c61318aac..78c74ef879 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -392,20 +392,12 @@ bool MipsDAGToDAGISel::selectAddrRegImm(SDValue Addr, SDValue &Base,
       }
     }
   }
+
+  return false;
 }
 
 bool MipsDAGToDAGISel::selectAddrDefault(SDValue Addr, SDValue &Base,
                                          SDValue &Offset) const {
-  // @LOCALMOD-START
-  // If an indexed floating point load/store can be emitted, return false.
-  const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
-
-  if (LS &&
-     (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
-      Subtarget.hasFPIdx())
-    return false;
-  // @LOCALMOD-END
-
   Base = Addr;
   Offset = CurDAG->getTargetConstant(0, Addr.getValueType());
   return true;
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 73074f5b58..d22400d211 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -319,23 +319,23 @@ let Predicates = [NotN64, NotMips64, HasStdEnc] in {
 }
 
 // Indexed loads and stores.
-let Predicates = [HasFPIdx, IsNotNaCl/*@LOCALMOD*/] in {
+let Predicates = [HasFPIdx, HasStdEnc, IsNotNaCl/*@LOCALMOD*/] in {
   def LWXC1 : LWXC1_FT<"lwxc1", FGR32, CPURegs, IILoad, load>, LWXC1_FM<0>;
   def SWXC1 : SWXC1_FT<"swxc1", FGR32, CPURegs, IIStore, store>, SWXC1_FM<8>;
 }
 
-let Predicates = [HasMips32r2, NotMips64, IsNotNaCl/*@LOCALMOD*/] in {
+let Predicates = [HasMips32r2, NotMips64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/] in {
   def LDXC1 : LWXC1_FT<"ldxc1", AFGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
   def SDXC1 : SWXC1_FT<"sdxc1", AFGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
-let Predicates = [HasMips64, NotN64, IsNotNaCl/*@LOCALMOD*/], DecoderNamespace="Mips64" in {
+let Predicates = [HasMips64, NotN64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/], DecoderNamespace="Mips64" in {
   def LDXC164 : LWXC1_FT<"ldxc1", FGR64, CPURegs, IILoad, load>, LWXC1_FM<1>;
   def SDXC164 : SWXC1_FT<"sdxc1", FGR64, CPURegs, IIStore, store>, SWXC1_FM<9>;
 }
 
 // n64
-let Predicates = [IsN64, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
+let Predicates = [IsN64, HasStdEnc, IsNotNaCl/*@LOCALMOD*/], isCodeGenOnly=1 in {
   def LWXC1_P8 : LWXC1_FT<"lwxc1", FGR32, CPU64Regs, IILoad, load>, LWXC1_FM<0>;
   def LDXC164_P8 : LWXC1_FT<"ldxc1", FGR64, CPU64Regs, IILoad, load>,
                    LWXC1_FM<1>;
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index e0d884dfda..d836975eb7 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -164,4 +164,3 @@ void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   }
 }
 
-
diff --git a/lib/Target/Mips/MipsNaClRewritePass.cpp b/lib/Target/Mips/MipsNaClRewritePass.cpp
index 5e4e5e3b8f..d4407e835e 100644
--- a/lib/Target/Mips/MipsNaClRewritePass.cpp
+++ b/lib/Target/Mips/MipsNaClRewritePass.cpp
@@ -112,6 +112,7 @@ void MipsNaClRewritePass::SandboxStackChange(MachineBasicBlock &MBB,
 
   // Get to next instr (one + to get the original, and one more + to get past).
   MachineBasicBlock::iterator MBBINext = (MBBI++);
+  (void) MBBINext;
   MachineBasicBlock::iterator MBBINext2 = (MBBI++);
 
   BuildMI(MBB, MBBINext2, MI.getDebugLoc(),
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 370eebe1f3..2039b7d210 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -77,18 +77,21 @@ X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
 void X86ELFMCAsmInfo::anchor() { }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
-  // @LOCALMOD-BEGIN s/gnux32/nacl/
   bool is64Bit = T.getArch() == Triple::x86_64;
+  bool isX32 = T.getEnvironment() == Triple::GNUX32;
+
+  // @LOCALMOD-BEGIN(eliben)
+  // Until Nacl implies x32, we add &&!isNaCl in the PointerSize condition
   bool isNaCl = T.isOSNaCl();
 
   // For ELF, x86-64 pointer size depends on the ABI.
   // For x86-64 without the x32 ABI, pointer size is 8. For x86 and for x86-64
   // with the x32 ABI, pointer size remains the default 4.
-  PointerSize = (is64Bit && !isNaCl) ? 8 : 4;
+  PointerSize = (is64Bit && !isX32 && !isNaCl) ? 8 : 4;
+  // @LOCALMOD-END
 
   // OTOH, stack slot size is always 8 for x86-64, even with the x32 ABI.
   CalleeSaveStackSlotSize = is64Bit ? 8 : 4;
-  // @LOCALMOD-END
 
   AssemblerDialect = AsmWriterFlavor;
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index efacf5be4a..89e3538558 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -341,6 +341,74 @@ bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
   return true;
 }
 
+/// @LOCALMOD-BEGIN
+/// isLegalAddressingModeForNaCl - Determine if the addressing mode is
+/// legal for NaCl translation.  If not, the caller is expected to
+/// reject the instruction for fast-ISel code generation.
+///
+/// The logic for the test is translated from the corresponding logic
+/// in X86DAGToDAGISel::LegalizeAddressingModeForNaCl().  It can't be
+/// used directly due to the X86AddressMode vs X86ISelAddressMode
+/// types.  As such, any changes to isLegalAddressingModeForNaCl() and
+/// X86DAGToDAGISel::LegalizeAddressingModeForNaCl() need to be
+/// synchronized.  The original conditions are indicated in comments.
+static bool isLegalAddressingModeForNaCl(const X86Subtarget *Subtarget,
+                                         const X86AddressMode &AM) {
+  if (Subtarget->isTargetNaCl64()) {
+    // Return true (i.e., is legal) if the equivalent of
+    // X86ISelAddressMode::isRIPRelative() is true.
+    if (AM.BaseType == X86AddressMode::RegBase &&
+        AM.Base.Reg == X86::RIP)
+      return true;
+
+    // Check for the equivalent of
+    // (!AM.hasBaseOrIndexReg() &&
+    //  !AM.hasSymbolicDisplacement() &&
+    //  AM.Disp < 0)
+    if (!((AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg) ||
+          AM.IndexReg) &&
+        !AM.GV &&
+        AM.Disp < 0) {
+      ++NumFastIselNaClFailures;
+      return false;
+    }
+
+    // At this point in the LegalizeAddressingModeForNaCl() code, it
+    // normalizes an addressing mode with a base register and no index
+    // register into an equivalent mode with an index register and no
+    // base register.  Since we don't modify AM, we may have to check
+    // both the base and index register fields in the remainder of the
+    // tests.
+
+    // Check for the equivalent of
+    // ((AM.BaseType == X86ISelAddressMode::FrameIndexBase || AM.GV || AM.CP) &&
+    //   AM.IndexReg.getNode() &&
+    //   AM.Disp > 0)
+    // Note: X86AddressMode doesn't have a CP analogue
+    if ((AM.BaseType == X86AddressMode::FrameIndexBase || AM.GV) &&
+        ((AM.BaseType == X86AddressMode::RegBase && AM.Base.Reg) ||
+         AM.IndexReg) &&
+        AM.Disp > 0) {
+      ++NumFastIselNaClFailures;
+      return false;
+    }
+
+    // Check for the equivalent of
+    // ((AM.BaseType == X86ISelAddressMode::RegBase) &&
+    //  AM.Base_Reg.getNode() &&
+    //  AM.IndexReg.getNode())
+    if ((AM.BaseType == X86AddressMode::RegBase) &&
+        AM.Base.Reg &&
+        AM.IndexReg) {
+      ++NumFastIselNaClFailures;
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// @LOCALMOD-END
 /// X86SelectAddress - Attempt to fill in an address from the given value.
 ///
 /// @LOCALMOD-BEGIN
@@ -870,7 +938,6 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     unsigned CopyTo = Subtarget->has64BitPointers() ? X86::RAX : X86::EAX;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
             CopyTo).addReg(Reg);
-    MRI.addLiveOut(CopyTo);
     // @LOCALMOD-END
   }
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 22a7b8d022..c2bb675235 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1695,20 +1695,21 @@ X86TargetLowering::LowerReturn(SDValue Chain,
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
-    // @LOCALMOD-START
+    unsigned RetValReg = Subtarget->isTarget64BitILP32() ? X86::EAX : X86::RAX;
+    // @LOCALMOD-BEGIN
     if (Subtarget->isTargetNaCl()) {
       // NaCl 64 uses 32-bit pointers, so there might be some zero-ext needed.
       SDValue Zext = DAG.getZExtOrTrunc(Val, dl, MVT::i64);
       Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Zext, Flag);
     } else {
-      Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+      Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     }
     // @LOCALMOD-END
 
     Flag = Chain.getValue(1);
 
-    // RAX now acts like a return value.
-    MRI.addLiveOut(X86::RAX);
+    // RAX/EAX now acts like a return value.
+    RetOps.push_back(DAG.getRegister(RetValReg, MVT::i64));
   }
 
   RetOps[0] = Chain;  // Update chain.
@@ -2068,9 +2069,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
     X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
     unsigned Reg = FuncInfo->getSRetReturnReg();
     if (!Reg) {
-      // @LOCALMOD
-      Reg = MF.getRegInfo().createVirtualRegister(
-          getRegClassFor(getPointerTy()));
+      MVT PtrTy = getPointerTy();
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
       FuncInfo->setSRetReturnReg(Reg);
     }
     SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
@@ -2684,8 +2684,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // This isn't right, although it's probably harmless on x86; liveouts
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
-    return DAG.getNode(X86ISD::TC_RETURN, dl,
-                       NodeTys, &Ops[0], Ops.size());
+    return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   }
 
   Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
@@ -7634,8 +7633,8 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
-           unsigned char OperandFlags,
-           unsigned Opcode = X86ISD::TLSADDR) { // @LOCALMOD
+           unsigned char OperandFlags, bool LocalDynamic = false,
+           unsigned Opcode = ISD::DELETED_NODE) { // @LOCALMOD
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   DebugLoc dl = GA->getDebugLoc();
@@ -7644,15 +7643,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
                                            GA->getOffset(),
                                            OperandFlags);
 
-  X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
-                                           : X86ISD::TLSADDR;
+  // @LOCALMOD - changed type for casting
+  unsigned CallType = LocalDynamic ? X86ISD::TLSBASEADDR
+                                   : X86ISD::TLSADDR;
+
+  // @LOCALMOD-START
+  // If Opcode was explicitly overridden, use it as the call type.
+  if (Opcode != ISD::DELETED_NODE)
+    CallType = Opcode;
+  // @LOCALMOD-END
 
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
-    Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 3); // @LOCALMOD
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
   } else {
     SDValue Ops[]  = { Chain, TGA };
-    Chain = DAG.getNode(Opcode, dl, NodeTys, Ops, 2); // @LOCALMOD
+    Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -7704,7 +7710,7 @@ LowerToTLSExecCall(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
                     X86::EAX, // PtrVT is 32-bit.
-                    TargetFlag, Opcode);
+                    TargetFlag, false, Opcode);
 }
 
 // @LOCALMOD-START
@@ -7726,7 +7732,7 @@ LowerToTLSNaCl64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
 
   return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
                     X86::EAX, // PtrVT is 32-bit.
-                    TargetFlag, Opcode);
+                    TargetFlag, false, Opcode);
 }
 // @LOCALMOD-END
 
@@ -7851,7 +7857,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
       case TLSModel::InitialExec:
       case TLSModel::LocalExec:
         // @LOCALMOD-START
-        if (llvm::TLSUseCall) {
+        if (llvm::TLSUseCall && Subtarget->isTargetNaCl()) {
           return LowerToTLSExecCall(GA, DAG, getPointerTy(), model,
                                     Subtarget->is64Bit());
         } else {
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 01be39a338..d86a4065a7 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -32,7 +32,6 @@ def LEA64_32r : I<0x8D, MRMSrcMem,
                   [(set GR32:$dst, lea64_32addr:$src)], IIC_LEA>,
                   Requires<[In64BitMode]>;
 
-// @LOCALMOD (lea64mem)
 let isReMaterializable = 1 in
 def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
                   "lea{q}\t{$src|$dst}, {$dst|$src}",
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 31aa25e3f2..241d2cab2c 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -533,13 +533,6 @@ def i64i8imm   : Operand<i64> {
   let OperandType = "OPERAND_IMMEDIATE";
 }
 
-// @LOCALMOD
-def lea64mem : Operand<i64> {
-  let PrintMethod = "printi64mem";
-  let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm, i8imm);
-  let ParserMatchClass = X86MemAsmOperand;
-}
-
 def lea64_32mem : Operand<i32> {
   let PrintMethod = "printi32mem";
   let AsmOperandLowerMethod = "lower_lea64_32mem";
@@ -562,8 +555,7 @@ def lea64mem : Operand<i64> {
 // Define X86 specific addressing mode.
 def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
 def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
-                               [add, sub, mul, X86mul_imm, shl, or, frameindex,
-                               X86WrapperRIP], // @LOCALMOD
+                               [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
 // In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
 def lea64_32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a9a6579c47..dca129a3ea 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -45,8 +45,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, StringRef TT,
                "n8:16:32-S32" :
                getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
                "e-p:32:32-s:32-f64:64:64-f32:32:32-f80:128:128-i64:64:64-n8:16:32-S128" :
-               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-"
-               "n8:16:32-S128"),
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32-S128"),
     InstrInfo(*this),
     TLInfo(*this),
     TSInfo(*this),
@@ -63,9 +62,11 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, StringRef TT,
   : X86TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true),
     DL(getSubtargetImpl()->isTargetNaCl() ? // @LOCALMOD
                "e-p:32:32-s:64-f64:64:64-f32:32:32-f80:128:128-i64:64:64-"
-               "n8:16:32:64-S128" :
-               "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
-               "n8:16:32:64-S128"),
+               "n8:16:32:64-S128" : (getSubtargetImpl()->isTarget64BitILP32() ?
+                    "e-p:32:32-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+                    "n8:16:32:64-S128" :
+                    "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-"
+                    "n8:16:32:64-S128")),
     InstrInfo(*this),
     TLInfo(*this),
     TSInfo(*this),
@@ -156,6 +157,7 @@ public:
   }
 
   virtual bool addInstSelector();
+  virtual bool addILPOpts();
   virtual bool addPreRegAlloc();
   virtual bool addPostRegAlloc();
   virtual bool addPreEmitPass();
diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp
index 065226fedd..929b2e0a15 100644
--- a/lib/Transforms/NaCl/ExpandTls.cpp
+++ b/lib/Transforms/NaCl/ExpandTls.cpp
@@ -239,9 +239,8 @@ static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars,
   AttrBuilder B;
   B.addAttribute(Attribute::ReadOnly);
   B.addAttribute(Attribute::NoUnwind);
-  AttributeSet ReadTpAttrs = AttributeSet().addAttr(
-      M.getContext(), AttributeSet::FunctionIndex,
-      Attribute::get(M.getContext(), B));
+  AttributeSet ReadTpAttrs = AttributeSet::get(
+      M.getContext(), AttributeSet::FunctionIndex, B);
   Constant *ReadTpFunc = M.getOrInsertTargetIntrinsic("llvm.nacl.read.tp",
                                                       ReadTpType,
                                                       ReadTpAttrs);
diff --git a/lib/Transforms/Scalar/NaClCcRewrite.cpp b/lib/Transforms/Scalar/NaClCcRewrite.cpp
deleted file mode 100644
index 72a8e7e358..0000000000
--- a/lib/Transforms/Scalar/NaClCcRewrite.cpp
+++ /dev/null
@@ -1,1053 +0,0 @@
-//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements calling convention rewrite for Native Client to ensure
-// compatibility between pnacl and gcc generated code when calling
-// ppapi interface functions.
-//===----------------------------------------------------------------------===//
-
-
-// Major TODOs:
-// * dealing with vararg
-//   (We shoulf exclude all var arg functions and calls to them from rewrites)
-
-#define DEBUG_TYPE "naclcc"
-
-#include "llvm/Pass.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/Constant.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Transforms/Scalar.h"
-
-#include <vector>
-
-using namespace llvm;
-
-namespace llvm {
-
-cl::opt<bool> FlagEnableCcRewrite(
-  "nacl-cc-rewrite",
-  cl::desc("enable NaCl CC rewrite"));
-}
-
-namespace {
-
-// This represents a rule for rewiriting types
-struct TypeRewriteRule {
-  const char* src;    // type pattern we are trying to match
-  const char* dst;    // replacement type
-  const char* name;   // name of the rule for diagnosis
-};
-
-// Note: all rules must be well-formed
-// * parentheses must match
-// * TODO: add verification for this
-
-// Legend:
-// s(): struct (also used for unions)
-// c:   char (= 8 bit int)  (only allowed for src)
-// i:   32 bit int
-// l:   64 bit int
-// f:   32 bit float
-// d:   64 bit float (= double)
-// p:   untyped pointer (only allowed for src)
-// P(): typed pointer (currently not used, only allowed for src)
-// F:   generic function type (only allowed for src)
-
-// The X8664 Rewrite rules are also subject to
-// register constraints, c.f.: section 3.2.3
-// http://www.x86-64.org/documentation/abi.pdf
-// (roughly) for X8664: up to 2 regs per struct can be used for struct passsing
-//                      and up to 2 regs for struct returns
-// The rewrite rules are straight forward except for: s(iis(d)) => ll
-// which would be straight forward if the frontend had lowered the union inside
-// of PP_Var to s(l) instead of s(d), yielding: s(iis(l)) => ll
-TypeRewriteRule ByvalRulesX8664[] = {
-  {"s(iis(d))", "ll", "PP_Var"},
-  {"s(pp)",     "l",  "PP_ArrayOutput"},
-  {"s(ppi)",    "li", "PP_CompletionCallback"},
-  {0, 0, 0},
-};
-
-TypeRewriteRule SretRulesX8664[] = {
-  // Note: for srets, multireg returns are modeled as struct returns
-  {"s(iis(d))", "s(ll)", "PP_Var"},
-  {"s(ff)",     "d",     "PP_FloatPoint"},
-  {"s(ii)",     "l",     "PP_Point" },
-  {"s(pp)",     "l",     "PP_ArrayOutput"},
-  {0, 0, 0},
-};
-
-// for ARM: up to 4 regs can be used for struct passsing
-//          and up to 2 float regs for struct returns
-TypeRewriteRule ByvalRulesARM[] = {
-  {"s(iis(d))",  "ll",  "PP_Var"},
-  {"s(ppi)",     "iii", "PP_CompletionCallback" },
-  {"s(pp)",      "ii",  "PP_ArrayOutput"},
-  {0, 0, 0},
-};
-
-TypeRewriteRule SretRulesARM[] = {
-  // Note: for srets, multireg returns are modeled as struct returns
-  {"s(ff)",     "s(ff)", "PP_FloatPoint"},
-  {0, 0, 0},
-};
-
-// Helper class to model Register Usage as required by
-// the x86-64 calling conventions
-class RegUse {
-  uint32_t n_int_;
-  uint32_t n_float_;
-
- public:
-  RegUse(uint32_t n_int=0, uint32_t n_float=0) :
-    n_int_(n_int), n_float_(n_float) {}
-
-  static RegUse OneIntReg() { return RegUse(1, 0); }
-  static RegUse OnePointerReg() { return RegUse(1, 0); }
-  static RegUse OneFloatReg() { return RegUse(0, 1); }
-
-  RegUse operator+(RegUse other) const {
-    return RegUse(n_int_ + other.n_int_, n_float_ + other.n_float_); }
-  RegUse operator-(RegUse other) const {
-    return RegUse(n_int_ - other.n_int_, n_float_ - other.n_float_); }
-  bool operator==(RegUse other) const {
-    return n_int_ == other.n_int_ &&  n_float_ == other.n_float_; }
-  bool operator!=(RegUse other) const {
-    return n_int_ != other.n_int_ &&  n_float_ != other.n_float_; }
-  bool operator<=(RegUse other) const {
-    return n_int_ <= other.n_int_ &&  n_float_ <= other.n_float_; }
-  bool operator<(RegUse other) const {
-    return n_int_ < other.n_int_ &&  n_float_ < other.n_float_; }
-  bool operator>=(RegUse other) const {
-    return n_int_ >= other.n_int_ &&  n_float_ >= other.n_float_; }
-  bool operator>(RegUse other) const {
-    return n_int_ > other.n_int_ &&  n_float_ > other.n_float_; }
-  RegUse& operator+=(const RegUse& other) {
-    n_int_ += other.n_int_; n_float_ += other.n_float_; return *this;}
-  RegUse& operator-=(const RegUse& other) {
-    n_int_ -= other.n_int_; n_float_ -= other.n_float_; return *this;}
-
-  friend raw_ostream& operator<<(raw_ostream &O, const RegUse& reg);
-};
-
-raw_ostream& operator<<(raw_ostream &O, const RegUse& reg) {
-  O << "(" << reg.n_int_ << ", " << reg.n_float_ << ")";
-  return O;
-}
-
-// TODO: Find a better way to determine the architecture
-const TypeRewriteRule* GetByvalRewriteRulesForTarget(
-  const TargetLowering* tli) {
-  if (!FlagEnableCcRewrite) return 0;
-
-  const TargetMachine &m = tli->getTargetMachine();
-  const StringRef triple = m.getTargetTriple();
-
-  if (0 == triple.find("x86_64"))  return ByvalRulesX8664;
-  if (0 == triple.find("i686")) return 0;
-  if (0 == triple.find("armv7a")) return ByvalRulesARM;
-
-  llvm_unreachable("Unknown arch");
-  return 0;
-}
-
-// TODO: Find a better way to determine the architecture
-const TypeRewriteRule* GetSretRewriteRulesForTarget(
-  const TargetLowering* tli) {
-  if (!FlagEnableCcRewrite) return 0;
-
-  const TargetMachine &m = tli->getTargetMachine();
-  const StringRef triple = m.getTargetTriple();
-
-  if (0 == triple.find("x86_64"))  return SretRulesX8664;
-  if (0 == triple.find("i686")) return 0;
-  if (0 == triple.find("armv7a")) return SretRulesARM;
-
-  llvm_unreachable("Unknown arch");
-  return 0;
-}
-
-// TODO: Find a better way to determine the architecture
-// Describes the number of registers available for function
-// argument passing which may affect rewrite decisions on
-// some platforms.
-RegUse GetAvailableRegsForTarget(
-  const TargetLowering* tli) {
-  if (!FlagEnableCcRewrite) return RegUse(0, 0);
-
-  const TargetMachine &m = tli->getTargetMachine();
-  const StringRef triple = m.getTargetTriple();
-
-  // integer: RDI, RSI, RDX, RCX, R8, R9
-  // float XMM0, ..., XMM7
-  if (0 == triple.find("x86_64"))  return RegUse(6, 8);
-  // unused
-  if (0 == triple.find("i686")) return RegUse(0, 0);
-  // no constraints enforced here - the backend handles all the details
-  uint32_t max = std::numeric_limits<uint32_t>::max();
-  if (0 == triple.find("armv7a")) return RegUse(max, max);
-
-  llvm_unreachable("Unknown arch");
-  return 0;
-}
-
-// This class represents the a bitcode rewrite pass which ensures
-// that all ppapi interfaces are calling convention compatible
-// with gcc. This pass is archtitecture dependent.
-struct NaClCcRewrite : public FunctionPass {
-  static char ID; // Pass identification, replacement for typeid
-  const TypeRewriteRule* SretRewriteRules;
-  const TypeRewriteRule* ByvalRewriteRules;
-  const RegUse AvailableRegs;
-
-  explicit NaClCcRewrite(const TargetLowering *tli = 0)
-    : FunctionPass(ID),
-      SretRewriteRules(GetSretRewriteRulesForTarget(tli)),
-      ByvalRewriteRules(GetByvalRewriteRulesForTarget(tli)),
-      AvailableRegs(GetAvailableRegsForTarget(tli)) {
-    initializeNaClCcRewritePass(*PassRegistry::getPassRegistry());
-  }
-
-  // main pass entry point
-  bool runOnFunction(Function &F);
-
- private:
-  void RewriteCallsite(Instruction* call, LLVMContext& C);
-  void RewriteFunctionPrologAndEpilog(Function& F);
-};
-
-char NaClCcRewrite::ID = 0;
-
-// This is only used for dst side of rules
-Type* GetElementaryType(char c, LLVMContext& C) {
-  switch (c) {
-   case 'i':
-    return Type::getInt32Ty(C);
-   case 'l':
-    return Type::getInt64Ty(C);
-   case 'd':
-    return Type::getDoubleTy(C);
-   case 'f':
-    return Type::getFloatTy(C);
-   default:
-    dbgs() << c << "\n";
-    llvm_unreachable("Unknown type specifier");
-    return 0;
-  }
-}
-
-// This is only used for the dst side of a rule
-int GetElementaryTypeWidth(char c) {
-  switch (c) {
-   case 'i':
-   case 'f':
-    return 4;
-   case 'l':
-   case 'd':
-    return 8;
-   default:
-    llvm_unreachable("Unknown type specifier");
-    return 0;
-  }
-}
-
-// Check whether a type matches the *src* side pattern of a rewrite rule.
-// Note that the pattern parameter is updated during the recursion
-bool HasRewriteType(const Type* type, const char*& pattern) {
-  switch (*pattern++) {
-   case '\0':
-    return false;
-   case ')':
-    return false;
-   case 's':   // struct and union are currently no distinguished
-    {
-      if (*pattern++ != '(')  llvm_unreachable("malformed type pattern");
-      if (!type->isStructTy()) return false;
-      // check struct members
-      const StructType* st = cast<StructType>(type);
-      for (StructType::element_iterator it = st->element_begin(),
-                                        end = st->element_end();
-           it != end;
-           ++it) {
-        if (!HasRewriteType(*it, pattern)) return false;
-      }
-      // ensure we reached the end
-      int c = *pattern++;
-      return c == ')';
-    }
-    break;
-   case 'c':
-    return type->isIntegerTy(8);
-   case 'i':
-    return type->isIntegerTy(32);
-   case 'l':
-    return type->isIntegerTy(64);
-   case 'd':
-    return type->isDoubleTy();
-   case 'f':
-    return type->isFloatTy();
-   case 'F':
-    return type->isFunctionTy();
-   case 'p':  // untyped pointer
-    return type->isPointerTy();
-   case 'P':  // typed pointer
-    {
-      if (*pattern++ != '(')  llvm_unreachable("malformed type pattern");
-      if (!type->isPointerTy()) return false;
-      Type* pointee = dyn_cast<PointerType>(type)->getElementType();
-      if (!HasRewriteType(pointee, pattern)) return false;
-      int c = *pattern++;
-      return c == ')';
-    }
-   default:
-    llvm_unreachable("Unknown type specifier");
-    return false;
-  }
-}
-
-RegUse RegUseForRewriteRule(const TypeRewriteRule* rule) {
-  const char* pattern = std::string("C") == rule->dst ? rule->src : rule->dst;
-  RegUse result(0, 0);
-  while (char c = *pattern++) {
-    // Note, we only support a subset here, complex types (s, P)
-    // would require more work
-    switch (c) {
-     case 'i':
-     case 'l':
-      result += RegUse::OneIntReg();
-      break;
-     case 'd':
-     case 'f':
-      result += RegUse::OneFloatReg();
-      break;
-     default:
-      dbgs() << c << "\n";
-      llvm_unreachable("unexpected return type");
-    }
-  }
-  return result;
-}
-
-// Note, this only has to be accurate for x86-64 and is intentionally
-// quite strict so that we know when to add support for new types.
-// Ideally, unexpected types would be flagged by a bitcode checker.
-RegUse RegUseForType(const Type* t) {
- if (t->isPointerTy()) {
-   return RegUse::OnePointerReg();
- } else if (t->isFloatTy() || t->isDoubleTy()) {
-   return RegUse::OneFloatReg();
- } else if (t->isIntegerTy()) {
-   const IntegerType* it = dyn_cast<const IntegerType>(t);
-   unsigned width = it->getBitWidth();
-   // x86-64 assumption here - use "register info" to make this better
-   if (width <= 64) return RegUse::OneIntReg();
- }
-
- dbgs() << *const_cast<Type*>(t) << "\n";
- llvm_unreachable("unexpected type in RegUseForType");
-}
-
-// Match a type against a set of rewrite rules.
-// Return the matching rule, if any.
-const TypeRewriteRule* MatchRewriteRules(
-  const Type* type, const TypeRewriteRule* rules) {
-  if (rules == 0) return 0;
-  for (; rules->name != 0; ++rules) {
-    const char* pattern = rules->src;
-    if (HasRewriteType(type, pattern)) return rules;
-  }
-  return 0;
-}
-
-// Same as MatchRewriteRules but "dereference" type first.
-const TypeRewriteRule* MatchRewriteRulesPointee(const Type* t,
-                                                const TypeRewriteRule* Rules) {
-  // sret and byval are both modelled as pointers
-  const PointerType* pointer = dyn_cast<PointerType>(t);
-  if (pointer == 0) return 0;
-
-  return MatchRewriteRules(pointer->getElementType(), Rules);
-}
-
-// Note, the attributes are not part of the type but are stored
-// with the CallInst and/or the Function (if any)
-Type* CreateFunctionPointerType(Type* result_type,
-                                std::vector<Type*>& arguments) {
-  FunctionType* ft = FunctionType::get(result_type,
-                                       arguments,
-                                       false);
-  return PointerType::getUnqual(ft);
-}
-
-// Determines whether a function body needs a rewrite
-bool FunctionNeedsRewrite(const Function* fun,
-                          const TypeRewriteRule* ByvalRewriteRules,
-                          const TypeRewriteRule* SretRewriteRules,
-                          RegUse available) {
-  // TODO: can this be detected on indirect callsites as well.
-  //       if we skip the rewrite for the function body
-  //       we also need to skip it at the callsites
-  // if (F.isVarArg()) return false;
-
-  // Vectors and Arrays are not supported for compatibility
-  for (Function::const_arg_iterator AI = fun->arg_begin(), AE = fun->arg_end();
-       AI != AE;
-       ++AI) {
-    const Type* t = AI->getType();
-    if (isa<VectorType>(t) || isa<ArrayType>(t)) return false;
-  }
-
-  for (Function::const_arg_iterator AI = fun->arg_begin(), AE = fun->arg_end();
-       AI != AE;
-       ++AI) {
-    const Argument& a = *AI;
-    const Type* t = a.getType();
-    // byval and srets are modelled as pointers (to structs)
-    if (t->isPointerTy()) {
-      Type* pointee = dyn_cast<PointerType>(t)->getElementType();
-
-      if (ByvalRewriteRules && a.hasByValAttr()) {
-        const TypeRewriteRule* rule =
-          MatchRewriteRules(pointee, ByvalRewriteRules);
-        if (rule != 0 && RegUseForRewriteRule(rule) <= available) {
-          return true;
-        }
-      } else if (SretRewriteRules && a.hasStructRetAttr()) {
-        if (0 != MatchRewriteRules(pointee, SretRewriteRules)) {
-          return true;
-        }
-      }
-    }
-    available -= RegUseForType(t);
-  }
-  return false;
-}
-
-// Used for sret rewrites to determine the new function result type
-Type* GetNewReturnType(Type* type,
-                       const TypeRewriteRule* rule,
-                       LLVMContext& C) {
-  if (std::string("l") == rule->dst ||
-      std::string("d") == rule->dst) {
-    return GetElementaryType(rule->dst[0], C);
-  } else if (rule->dst[0] == 's') {
-    const char* cp = rule->dst + 2; // skip 's('
-    std::vector<Type*> fields;
-    while (*cp != ')') {
-      fields.push_back(GetElementaryType(*cp, C));
-      ++cp;
-    }
-    return StructType::get(C, fields, false /* isPacked */);
-  } else {
-    dbgs() << *type << " " << rule->name << "\n";
-    llvm_unreachable("unexpected return type");
-    return 0;
-  }
-}
-
-// Rewrite sret parameter while rewriting a function
-Type* RewriteFunctionSret(Function& F,
-                          Value* orig_val,
-                          const TypeRewriteRule* rule) {
-  LLVMContext& C = F.getContext();
-  BasicBlock& entry = F.getEntryBlock();
-  Instruction* before = &(entry.front());
-  Type* old_type = orig_val->getType();
-  Type* old_pointee = dyn_cast<PointerType>(old_type)->getElementType();
-  Type* new_type = GetNewReturnType(old_type, rule, C);
-  // create a temporary to hold the return value as we no longer pass
-  // in the pointer
-  AllocaInst* tmp_ret = new AllocaInst(old_pointee, "result", before);
-  orig_val->replaceAllUsesWith(tmp_ret);
-  CastInst* cast_ret = CastInst::CreatePointerCast(
-    tmp_ret,
-    PointerType::getUnqual(new_type),
-    "byval_cast",
-    before);
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end();
-         II != IE;
-         /* see below */) {
-      Instruction* inst = II;
-      // we do decontructive magic below, so advance the iterator here
-      // (this is still a little iffy)
-      ++II;
-      ReturnInst* ret = dyn_cast<ReturnInst>(inst);
-      if (ret) {
-        if (ret->getReturnValue() != 0)
-          llvm_unreachable("expected a void return");
-        // load the return value from temporary
-        Value *ret_val = new LoadInst(cast_ret, "load_result", ret);
-        // return that loaded value and delete the return instruction
-        ReturnInst::Create(C, ret_val, ret);
-        ret->eraseFromParent();
-      }
-    }
-  }
-  return new_type;
-}
-
-// Rewrite one byval function parameter while rewriting a function
-void FixFunctionByvalsParameter(Function& F,
-                                std::vector<Argument*>& new_arguments,
-                                std::vector<Attribute>& new_attributes,
-                                Value* byval,
-                                const TypeRewriteRule* rule) {
-  LLVMContext& C = F.getContext();
-  BasicBlock& entry = F.getEntryBlock();
-  Instruction* before = &(entry.front());
-  Twine prefix =  byval->getName() + "_split";
-  Type* t = byval->getType();
-  Type* pointee = dyn_cast<PointerType>(t)->getElementType();
-  AllocaInst* tmp_param = new AllocaInst(pointee, prefix + "_param", before);
-  byval->replaceAllUsesWith(tmp_param);
-  // convert byval poiner to char pointer
-  Value* base = CastInst::CreatePointerCast(
-    tmp_param, PointerType::getInt8PtrTy(C), prefix + "_base", before);
-
-  int width = 0;
-  const char* pattern = rule->dst;
-  for (int offset = 0; *pattern; ++pattern, offset += width) {
-    width = GetElementaryTypeWidth(*pattern);
-    Type* t = GetElementaryType(*pattern, C);
-    Argument* arg = new Argument(t, prefix, &F);
-    Type* pt = PointerType::getUnqual(t);
-    // the code below generates something like:
-    // <CHAR-PTR> = getelementptr i8* <BASE>, i32 <OFFSET-FROM-BASE>
-    // <PTR> = bitcast i8* <CHAR-PTR> to <TYPE>*
-    // store <ARG> <TYPE>* <ELEM-PTR>
-    ConstantInt* baseOffset = ConstantInt::get(Type::getInt32Ty(C), offset);
-    Value *v;
-    v = GetElementPtrInst::Create(base, baseOffset, prefix + "_base_add", before);
-    v = CastInst::CreatePointerCast(v, pt, prefix + "_cast", before);
-    v = new StoreInst(arg, v, before);
-
-    new_arguments.push_back(arg);
-    new_attributes.push_back(Attribute());
-  }
-}
-
-// Change function signature to reflect all the rewrites.
-// This includes function type/signature and attributes.
-void UpdateFunctionSignature(Function &F,
-                             Type* new_result_type,
-                             std::vector<Argument*>& new_arguments,
-                             std::vector<Attribute>& new_attributes) {
-  DEBUG(dbgs() << "PHASE PROTOTYPE UPDATE\n");
-  if (new_result_type) {
-    DEBUG(dbgs() << "NEW RESULT TYPE: " << *new_result_type << "\n");
-  }
-  // Update function type
-  FunctionType* old_fun_type = F.getFunctionType();
-  std::vector<Type*> new_types;
-  for (size_t i = 0; i < new_arguments.size(); ++i) {
-    new_types.push_back(new_arguments[i]->getType());
-  }
-
-  FunctionType* new_fun_type = FunctionType::get(
-    new_result_type ? new_result_type : old_fun_type->getReturnType(),
-    new_types,
-    false);
-  F.setType(PointerType::getUnqual(new_fun_type));
-
-  Function::ArgumentListType& args = F.getArgumentList();
-  DEBUG(dbgs() << "PHASE ARGUMENT DEL " <<  args.size() << "\n");
-  while (args.size()) {
-    Argument* arg = args.begin();
-    DEBUG(dbgs() << "DEL " << arg->getArgNo() << " " << arg->getName() << "\n");
-    args.remove(args.begin());
-  }
-
-  DEBUG(dbgs() << "PHASE ARGUMENT ADD " <<  new_arguments.size()   << "\n");
-  for (size_t i = 0; i < new_arguments.size(); ++i) {
-    Argument* arg = new_arguments[i];
-    DEBUG(dbgs() << "ADD " << i << " " << arg->getName() << "\n");
-    args.push_back(arg);
-  }
-
-  DEBUG(dbgs() << "PHASE ATTRIBUTES UPDATE\n");
-  std::vector<AttributeWithIndex> new_attributes_vec;
-  for (size_t i = 0; i < new_attributes.size(); ++i) {
-    Attribute attr = new_attributes[i];
-    if (attr.hasAttributes()) {
-      new_attributes_vec.push_back(AttributeWithIndex::get(i + 1, attr));
-    }
-  }
-  Attribute fattr = F.getAttributes().getFnAttributes();
-  if (fattr.hasAttributes())
-    new_attributes_vec.push_back(AttributeWithIndex::get(~0, fattr));
-  F.setAttributes(AttributeSet::get(F.getContext(), new_attributes_vec));
-}
-
-
-void ExtractFunctionArgsAndAttributes(Function& F,
-                                      std::vector<Argument*>& old_arguments,
-                                      std::vector<Attribute>& old_attributes) {
-  for (Function::arg_iterator ai = F.arg_begin(),
-                             end = F.arg_end();
-       ai != end;
-       ++ai) {
-    old_arguments.push_back(ai);
-  }
-
-  for (size_t i = 0; i < old_arguments.size(); ++i) {
-    // index zero is for return value attributes
-    old_attributes.push_back(F.getAttributes().getParamAttributes(i + 1));
-  }
-}
-
-// Apply byval or sret rewrites to function body.
-void NaClCcRewrite::RewriteFunctionPrologAndEpilog(Function& F) {
-
-  DEBUG(dbgs() << "\nFUNCTION-REWRITE\n");
-
-  DEBUG(dbgs() << "FUNCTION BEFORE ");
-  DEBUG(dbgs() << F);
-  DEBUG(dbgs() << "\n");
-
-  std::vector<Argument*> new_arguments;
-  std::vector<Attribute> new_attributes;
-  std::vector<Argument*> old_arguments;
-  std::vector<Attribute> old_attributes;
-
-
-  // make a copy of everything first as create Argument adds them to the list
-  ExtractFunctionArgsAndAttributes(F, old_arguments, old_attributes);
-
-  // A non-zero new_result_type indicates an sret rewrite
-  Type* new_result_type = 0;
-
-  // only the first arg can be "sret"
-  if (old_attributes.size() > 0 && old_attributes[0].hasAttribute(Attribute::StructRet)) {
-    const TypeRewriteRule* sret_rule =
-      MatchRewriteRulesPointee(old_arguments[0]->getType(), SretRewriteRules);
-    if (sret_rule) {
-      Argument* arg = old_arguments[0];
-      DEBUG(dbgs() << "REWRITING SRET "
-            << " arg " << arg->getName() << " " << sret_rule->name << "\n");
-      new_result_type = RewriteFunctionSret(F, arg, sret_rule);
-      old_arguments.erase(old_arguments.begin());
-      old_attributes.erase(old_attributes.begin());
-    }
-  }
-
-  // now deal with the byval arguments
-  RegUse available = AvailableRegs;
-  for (size_t i = 0; i < old_arguments.size(); ++i) {
-    Argument* arg = old_arguments[i];
-    Type* t = arg->getType();
-    Attribute attr = old_attributes[i];
-    if (attr.hasAttribute(Attribute::ByVal)) {
-      const TypeRewriteRule* rule =
-        MatchRewriteRulesPointee(t, ByvalRewriteRules);
-      if (rule != 0 && RegUseForRewriteRule(rule) <= available) {
-        DEBUG(dbgs() << "REWRITING BYVAL "
-              << *t << " arg " << arg->getName() << " " << rule->name << "\n");
-        FixFunctionByvalsParameter(F,
-                                   new_arguments,
-                                   new_attributes,
-                                   arg,
-                                   rule);
-        available -= RegUseForRewriteRule(rule);
-        continue;
-      }
-    }
-
-    // fall through case - no rewrite is happening
-    new_arguments.push_back(arg);
-    new_attributes.push_back(attr);
-    available -= RegUseForType(t);
-  }
-
-  UpdateFunctionSignature(F, new_result_type, new_arguments, new_attributes);
-
-  DEBUG(dbgs() << "FUNCTION AFTER ");
-  DEBUG(dbgs() << F);
-  DEBUG(dbgs() << "\n");
-}
-
-// used for T in {CallInst, InvokeInst}
-// TODO(robertm): try unifying this code with FunctionNeedsRewrite()
-template<class T> bool CallNeedsRewrite(
-  const Instruction* inst,
-  const TypeRewriteRule* ByvalRewriteRules,
-  const TypeRewriteRule* SretRewriteRules,
-  RegUse available) {
-
-  const T* call = cast<T>(inst);
-  // skip non parameter operands at the end
-  size_t num_params = call->getNumOperands() - (isa<CallInst>(inst) ? 1 : 3);
-
-  // Vectors and Arrays are not supported for compatibility
-  for (size_t i = 0; i <  num_params; ++i) {
-    Type* t = call->getOperand(i)->getType();
-    if (isa<VectorType>(t) || isa<ArrayType>(t)) return false;
-  }
-
-  for (size_t i = 0; i <  num_params; ++i) {
-    Type* t = call->getOperand(i)->getType();
-    // byval and srets are modelled as pointers (to structs)
-    if (t->isPointerTy()) {
-      Type* pointee = dyn_cast<PointerType>(t)->getElementType();
-
-      //  param zero is for the return value
-      if (ByvalRewriteRules && call->paramHasAttr(i + 1, Attribute::ByVal)) {
-        const TypeRewriteRule* rule =
-          MatchRewriteRules(pointee, ByvalRewriteRules);
-        if (rule != 0 && RegUseForRewriteRule(rule) <= available) {
-          return true;
-        }
-      } else if (SretRewriteRules &&
-                 call->paramHasAttr(i + 1, Attribute::StructRet)) {
-        if (0 != MatchRewriteRules(pointee, SretRewriteRules)) {
-          return true;
-        }
-      }
-    }
-    available -= RegUseForType(t);
-  }
-  return false;
-}
-
-// This code will load the fields of the byval ptr into scalar variables
-// which will then be used as argument when we rewrite the actual call
-// instruction.
-void PrependCompensationForByvals(std::vector<Value*>& new_operands,
-                                  std::vector<Attribute>& new_attributes,
-                                  Instruction* call,
-                                  Value* byval,
-                                  const TypeRewriteRule* rule,
-                                  LLVMContext& C) {
-  // convert byval poiner to char pointer
-  Value* base = CastInst::CreatePointerCast(
-    byval, PointerType::getInt8PtrTy(C), "byval_base", call);
-
-  int width = 0;
-  const char* pattern = rule->dst;
-  for (int offset = 0; *pattern; ++pattern, offset += width) {
-    width = GetElementaryTypeWidth(*pattern);
-    Type* t = GetElementaryType(*pattern, C);
-    Type* pt = PointerType::getUnqual(t);
-    // the code below generates something like:
-    // <CHAR-PTR> = getelementptr i8* <BASE>, i32 <OFFSET-FROM-BASE>
-    // <PTR> = bitcast i8* <CHAR-PTR> to i32*
-    // <SCALAR> = load i32* <ELEM-PTR>
-    ConstantInt* baseOffset = ConstantInt::get(Type::getInt32Ty(C), offset);
-    Value* v;
-    v = GetElementPtrInst::Create(base, baseOffset, "byval_base_add", call);
-    v = CastInst::CreatePointerCast(v, pt, "byval_cast", call);
-    v = new LoadInst(v, "byval_extract", call);
-
-    new_operands.push_back(v);
-    new_attributes.push_back(Attribute());
-  }
-}
-
-// Note: this will only be called if we expect a rewrite to occur
-void CallsiteFixupSrets(Instruction* call,
-                        Value* sret,
-                        Type* new_type,
-                        const TypeRewriteRule* rule) {
-  const char* pattern = rule->dst;
-  Instruction* next;
-  if (isa<CallInst>(call)) {
-    next = call->getNextNode();
-  } else if (isa<InvokeInst>(call)) {
-    // if this scheme turns out to be too simplistic (i.e. asserts fire)
-    // we need to introduce a new basic block for the compensation code.
-    BasicBlock* normal = dyn_cast<InvokeInst>(call)->getNormalDest();
-    if (!normal->getSinglePredecessor()) {
-      llvm_unreachable("unexpected invoke normal bb");
-    }
-    next = normal->getFirstNonPHI();
-  } else {
-    llvm_unreachable("unexpected call instruction");
-  }
-
-  if (next == 0) {
-    llvm_unreachable("unexpected missing next instruction");
-  }
-
-  if (pattern[0] == 's' ||
-      std::string("l") == pattern ||
-      std::string("d") == pattern) {
-    Type* pt = PointerType::getUnqual(new_type);
-    Value* cast = CastInst::CreatePointerCast(sret, pt, "cast", next);
-    new StoreInst(call, cast, next);
-  } else {
-    dbgs() << rule->name << "\n";
-    llvm_unreachable("unexpected return type at fix up");
-  }
-}
-
-void ExtractOperandsAndAttributesFromCallInst(
-  CallInst* call,
-  std::vector<Value*>& operands,
-  std::vector<Attribute>& attributes) {
-
-  AttributeSet PAL = call->getAttributes();
-  // last operand is: function
-  for (size_t i = 0; i <  call->getNumOperands() - 1; ++i) {
-    operands.push_back(call->getArgOperand(i));
-    // index zero is for return value attributes
-    attributes.push_back(PAL.getParamAttributes(i + 1));
-  }
-}
-
-// Note: this differs from the one above in the loop bounds
-void ExtractOperandsAndAttributesFromeInvokeInst(
-  InvokeInst* call,
-  std::vector<Value*>& operands,
-  std::vector<Attribute>& attributes) {
-  AttributeSet PAL = call->getAttributes();
-  // last three operands are: function, bb-normal, bb-exception
-  for (size_t i = 0; i <  call->getNumOperands() - 3; ++i) {
-    operands.push_back(call->getArgOperand(i));
-    // index zero is for return value attributes
-    attributes.push_back(PAL.getParamAttributes(i + 1));
-  }
-}
-
-
-Instruction* ReplaceCallInst(CallInst* call,
-                             Type* function_pointer,
-                             std::vector<Value*>& new_operands,
-                             std::vector<Attribute>& new_attributes) {
-  Value* v = CastInst::CreatePointerCast(
-    call->getCalledValue(), function_pointer, "fp_cast", call);
-  CallInst* new_call = CallInst::Create(v, new_operands, "", call);
-  // NOTE: tail calls may be ruled out but byval/sret, should we assert this?
-  // TODO: did wid forget to clone anything else?
-  new_call->setTailCall(call->isTailCall());
-  new_call->setCallingConv(call->getCallingConv());
-  for (size_t i = 0; i < new_attributes.size(); ++i) {
-    // index zero is for return value attributes
-    new_call->addAttribute(i + 1, new_attributes[i]);
-  }
-  return new_call;
-}
-
-Instruction* ReplaceInvokeInst(InvokeInst* call,
-                             Type* function_pointer,
-                             std::vector<Value*>& new_operands,
-                             std::vector<Attribute>& new_attributes) {
-  Value* v = CastInst::CreatePointerCast(
-    call->getCalledValue(), function_pointer, "fp_cast", call);
-  InvokeInst* new_call = InvokeInst::Create(v,
-                                            call->getNormalDest(),
-                                            call->getUnwindDest(),
-                                            new_operands,
-                                            "",
-                                            call);
-  for (size_t i = 0; i < new_attributes.size(); ++i) {
-    // index zero is for return value attributes
-    new_call->addAttribute(i + 1, new_attributes[i]);
-  }
-  return new_call;
-}
-
-
-void NaClCcRewrite::RewriteCallsite(Instruction* call, LLVMContext& C) {
-  BasicBlock* BB = call->getParent();
-
-  DEBUG(dbgs() << "\nCALLSITE-REWRITE\n");
-  DEBUG(dbgs() << "CALLSITE BB BEFORE " << *BB);
-  DEBUG(dbgs() << "\n");
-  DEBUG(dbgs() << *call << "\n");
-  if (isa<InvokeInst>(call)) {
-    DEBUG(dbgs() << "\n" << *(dyn_cast<InvokeInst>(call)->getNormalDest()));
-  }
-
-  // new_result(_type) is only relevent if an sret is rewritten
-  // whish is indicated by sret_rule != 0
-  const TypeRewriteRule* sret_rule = 0;
-  Type* new_result_type = call->getType();
-  // This is the sret which was originally passed in as the first arg.
-  // After the rewrite we simply copy the function result into it.
-  Value* new_result = 0;
-
-  std::vector<Value*> old_operands;
-  std::vector<Attribute> old_attributes;
-  if (isa<CallInst>(call)) {
-    ExtractOperandsAndAttributesFromCallInst(
-      cast<CallInst>(call), old_operands, old_attributes);
-  } else if (isa<InvokeInst>(call)) {
-    ExtractOperandsAndAttributesFromeInvokeInst(
-      cast<InvokeInst>(call), old_operands, old_attributes);
-  } else {
-    llvm_unreachable("Unexpected instruction type");
-  }
-
-  // handle sret (just the book-keeping, 'new_result' is dealt with below)
-  // only the first arg can be "sret"
-  if (old_attributes[0].hasAttribute(Attribute::StructRet)) {
-    sret_rule = MatchRewriteRulesPointee(
-      old_operands[0]->getType(), SretRewriteRules);
-    if (sret_rule) {
-      new_result_type =
-        GetNewReturnType(old_operands[0]->getType(), sret_rule, C);
-      new_result = old_operands[0];
-      old_operands.erase(old_operands.begin());
-      old_attributes.erase(old_attributes.begin());
-    }
-  }
-
-  // handle byval
-  std::vector<Value*> new_operands;
-  std::vector<Attribute> new_attributes;
-  RegUse available = AvailableRegs;
-
-  for (size_t i = 0; i <  old_operands.size(); ++i) {
-    Value *operand = old_operands[i];
-    Type* t = operand->getType();
-    Attribute attr = old_attributes[i];
-
-    if (attr.hasAttribute(Attribute::ByVal)) {
-      const TypeRewriteRule* rule =
-        MatchRewriteRulesPointee(t, ByvalRewriteRules);
-      if (rule != 0 && RegUseForRewriteRule(rule) <= available) {
-        DEBUG(dbgs() << "REWRITING BYVAL "
-              << *t << " arg " << i << " " << rule->name << "\n");
-        PrependCompensationForByvals(new_operands,
-                                     new_attributes,
-                                     call,
-                                     operand,
-                                     rule,
-                                     C);
-        available -= RegUseForRewriteRule(rule);
-        continue;
-      }
-    }
-
-    // fall through case - no rewrite is happening
-    new_operands.push_back(operand);
-    new_attributes.push_back(attr);
-    available -= RegUseForType(t);
-  }
-
-  // Note, this code is tricky.
-  // Initially we used a much more elaborate scheme introducing
-  // new function declarations for direct calls.
-  // This simpler scheme, however, works for both direct and
-  // indirect calls
-  // We transform (here the direct case):
-  // call void @result_PP_FloatPoint(%struct.PP_FloatPoint* sret %sret)
-  // into
-  //  %fp_cast = bitcast void (%struct.PP_FloatPoint*)*
-  //                @result_PP_FloatPoint to %struct.PP_FloatPoint ()*
-  //  %result = call %struct.PP_FloatPoint %fp_cast()
-  //
-  std::vector<Type*> new_arg_types;
-  for (size_t i = 0; i < new_operands.size(); ++i) {
-    new_arg_types.push_back(new_operands[i]->getType());
-  }
-
-  DEBUG(dbgs() << "REWRITE CALL INSTRUCTION\n");
-  Instruction* new_call = 0;
-  if (isa<CallInst>(call)) {
-    new_call = ReplaceCallInst(
-      cast<CallInst>(call),
-      CreateFunctionPointerType(new_result_type, new_arg_types),
-      new_operands,
-      new_attributes);
-  } else if (isa<InvokeInst>(call)) {
-    new_call = ReplaceInvokeInst(
-      cast<InvokeInst>(call),
-      CreateFunctionPointerType(new_result_type, new_arg_types),
-      new_operands,
-      new_attributes);
-  } else {
-    llvm_unreachable("Unexpected instruction type");
-  }
-
-  // We prepended the new call, now get rid of the old one.
-  // If we did not change the return type, there may be consumers
-  // of the result which must be redirected.
-  if (!sret_rule) {
-    call->replaceAllUsesWith(new_call);
-  }
-  call->eraseFromParent();
-
-  // Add compensation codes for srets if necessary
-  if (sret_rule) {
-    DEBUG(dbgs() << "REWRITING  SRET " << sret_rule->name << "\n");
-    CallsiteFixupSrets(new_call, new_result, new_result_type, sret_rule);
-  }
-
-  DEBUG(dbgs() << "CALLSITE BB AFTER" << *BB);
-  DEBUG(dbgs() << "\n");
-  DEBUG(dbgs() << *new_call << "\n");
-  if (isa<InvokeInst>(call)) {
-    DEBUG(dbgs() << "\n" << *(dyn_cast<InvokeInst>(call)->getNormalDest()));
-  }
-}
-
-bool NaClCcRewrite::runOnFunction(Function &F) {
-  // No rules - no action
-  if (ByvalRewriteRules == 0 && SretRewriteRules == 0) return false;
-
-  bool Changed = false;
-
-  if (FunctionNeedsRewrite(&F, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) {
-    DEBUG(dbgs() << "FUNCTION NEEDS REWRITE " << F.getName() << "\n");
-    RewriteFunctionPrologAndEpilog(F);
-    Changed = true;
-  }
-
-  // Find all the calls and invokes in F and rewrite them if necessary
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end();
-         II != IE;
-         /* II updated below */) {
-      Instruction* inst = II;
-      // we do decontructive magic below, so advance the iterator here
-      // (this is still a little iffy)
-      ++II;
-      if (isa<InvokeInst>(inst) || isa<CallInst>(inst))  {
-        // skip calls to llvm.dbg.declare, etc.
-        if (isa<IntrinsicInst>(inst)) continue;
-
-        if (isa<CallInst>(inst) &&
-            !CallNeedsRewrite<CallInst>
-            (inst, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) continue;
-
-        if (isa<InvokeInst>(inst) &&
-            !CallNeedsRewrite<InvokeInst>
-            (inst, ByvalRewriteRules, SretRewriteRules, AvailableRegs)) continue;
-
-        RewriteCallsite(inst, F.getContext());
-        Changed = true;
-      }
-    }
-  }
-  return Changed;
-}
-
-} // end anonymous namespace
-
-
-INITIALIZE_PASS(NaClCcRewrite, "naclcc", "NaCl CC Rewriter", false, false)
-
-FunctionPass *llvm::createNaClCcRewritePass(const TargetLowering *tli) {
-  return new NaClCcRewrite(tli);
-}
diff --git a/test/NaCl/ARM/neon-vld1-sandboxing.ll b/test/NaCl/ARM/neon-vld1-sandboxing.ll
index bf3dc253a6..9ae7990371 100644
--- a/test/NaCl/ARM/neon-vld1-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vld1-sandboxing.ll
@@ -4,7 +4,7 @@
 define <8 x i8> @vld1i8(i8* %A) nounwind {
   %tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}}, [r0:64]
   ret <8 x i8> %tmp1
 }
 
@@ -39,7 +39,7 @@ define <1 x i64> @vld1i64(i32 %foo, i32 %bar, i32 %baz,
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
   %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
   ret <16 x i8> %tmp1
 }
 
@@ -47,7 +47,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind {
   %tmp0 = bitcast i16* %A to i8*
   %tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
   ret <8 x i16> %tmp1
 }
 
@@ -83,7 +83,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
   %A = load i8** %ptr
   %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
 ; CHECK:         bic r1, r1, #3221225472
-; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1, :64]!
+; CHECK-NEXT:    vld1.8 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1:64]!
   %tmp2 = getelementptr i8* %A, i32 16
   store i8* %tmp2, i8** %ptr
   ret <16 x i8> %tmp1
diff --git a/test/NaCl/ARM/neon-vld2-sandboxing.ll b/test/NaCl/ARM/neon-vld2-sandboxing.ll
index b67a9bf4d1..788fdb55da 100644
--- a/test/NaCl/ARM/neon-vld2-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vld2-sandboxing.ll
@@ -29,7 +29,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
   %tmp4 = add <8 x i8> %tmp2, %tmp3
 ; CHECK: bic      r0, r0, #3221225472
-; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
   ret <8 x i8> %tmp4
 }
 
@@ -40,7 +40,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
   %tmp4 = add <4 x i16> %tmp2, %tmp3
 ; CHECK: bic      r0, r0, #3221225472
-; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128]
+; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
   ret <4 x i16> %tmp4
 }
 
@@ -61,7 +61,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
   %tmp4 = add <16 x i8> %tmp2, %tmp3
 ; CHECK: bic      r0, r0, #3221225472
-; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
   ret <16 x i8> %tmp4
 }
 
@@ -72,7 +72,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
   %tmp4 = add <8 x i16> %tmp2, %tmp3
 ; CHECK: bic      r0, r0, #3221225472
-; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128]
+; CHECK: vld2.16   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
   ret <8 x i16> %tmp4
 }
 
@@ -83,7 +83,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
   %tmp4 = add <4 x i32> %tmp2, %tmp3
 ; CHECK: bic      r0, r0, #3221225472
-; CHECK: vld2.32   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256]
+; CHECK: vld2.32   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:256]
   ret <4 x i32> %tmp4
 }
 
@@ -94,7 +94,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
   %tmp4 = add <16 x i8> %tmp2, %tmp3
 ; CHECK: bic      r2, r2, #3221225472
-; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2, :128], r1
+; CHECK: vld2.8   {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r2:128], r1
   %tmp5 = getelementptr i8* %A, i32 %inc
   store i8* %tmp5, i8** %ptr
   ret <16 x i8> %tmp4
diff --git a/test/NaCl/ARM/neon-vld3-sandboxing.ll b/test/NaCl/ARM/neon-vld3-sandboxing.ll
index 7fb8eb3077..5658b33d89 100644
--- a/test/NaCl/ARM/neon-vld3-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vld3-sandboxing.ll
@@ -29,7 +29,7 @@ define <8 x i8> @vld3i8(i32 %foobar, i32 %ba, i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
   %tmp4 = add <8 x i8> %tmp2, %tmp3
 ; CHECK:         bic r2, r2, #3221225472
-; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2, :64]
+; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2:64]
   ret <8 x i8> %tmp4
 }
 
@@ -62,7 +62,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
   %tmp4 = add <1 x i64> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
   ret <1 x i64> %tmp4
 }
 
@@ -72,7 +72,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
   %tmp4 = add <16 x i8> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]!
+; CHECK-NEXT:    vld3.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]!
   ret <16 x i8> %tmp4
 }
 
diff --git a/test/NaCl/ARM/neon-vld4-sandboxing.ll b/test/NaCl/ARM/neon-vld4-sandboxing.ll
index 570a3ce24c..74b0aafc7e 100644
--- a/test/NaCl/ARM/neon-vld4-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vld4-sandboxing.ll
@@ -29,7 +29,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
   %tmp4 = add <8 x i8> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:64]
   ret <8 x i8> %tmp4
 }
 
@@ -40,7 +40,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
   %tmp4 = add <4 x i16> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]
   ret <4 x i16> %tmp4
 }
 
@@ -51,7 +51,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
   %tmp4 = add <2 x i32> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
   ret <2 x i32> %tmp4
 }
 
@@ -62,7 +62,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
   %tmp4 = add <1 x i64> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+; CHECK-NEXT:    vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
   ret <1 x i64> %tmp4
 }
 
@@ -72,9 +72,9 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
   %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
   %tmp4 = add <16 x i8> %tmp2, %tmp3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]!
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]!
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0, :256]
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r0:256]
   ret <16 x i8> %tmp4
 }
 
@@ -82,7 +82,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
   %A = load i8** %ptr
   %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
 ; CHECK:         bic r2, r2, #3221225472
-; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2, :128], r1
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r2:128], r1
   %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
   %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
   %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -96,7 +96,7 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
   %tmp0 = bitcast i16* %A to i8*
   %tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
 ; CHECK:         bic r1, r1, #3221225472
-; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r1, :64]!
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}}, [r1:64]!
   %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
   %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
   %tmp4 = add <8 x i16> %tmp2, %tmp3
diff --git a/test/NaCl/ARM/neon-vlddup-sandboxing.ll b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
index 18e1b41de1..0ce51ad8cc 100644
--- a/test/NaCl/ARM/neon-vlddup-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vlddup-sandboxing.ll
@@ -36,7 +36,7 @@ define <4 x i16> @vld1dupi16(i16* %A) nounwind {
   %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
   %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[\]}}}, [r0, :16]
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[\]}}}, [r0:16]
   ret <4 x i16> %tmp3
 }
 
@@ -45,7 +45,7 @@ define <2 x i32> @vld1dupi32(i32* %A) nounwind {
   %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
   %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[\]}}}, [r0, :32]
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[\]}}}, [r0:32]
   ret <2 x i32> %tmp3
 }
 
@@ -85,7 +85,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
 define <2 x i32> @vld2dupi32(i8* %A) nounwind {
   %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64]
+; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0:64]
   %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
   %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -112,7 +112,7 @@ define <4 x i16> @vld3dupi16(i8* %A) nounwind {
 define <2 x i32> @vld4dupi32(i8* %A) nounwind {
   %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0, :64]
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}, {{d[0-9]+\[\]}}}, [r0:64]
   %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
   %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
   %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
diff --git a/test/NaCl/ARM/neon-vldlane-sandboxing.ll b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
index fbcef81ac9..9c890c7a61 100644
--- a/test/NaCl/ARM/neon-vldlane-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vldlane-sandboxing.ll
@@ -69,7 +69,7 @@ define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp2 = load i16* %A, align 8
   %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0:16]
   ret <4 x i16> %tmp3
 }
 
@@ -78,7 +78,7 @@ define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
   %tmp2 = load i32* %A, align 8
   %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0:32]
   ret <2 x i32> %tmp3
 }
 
@@ -96,7 +96,7 @@ define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
   %tmp2 = load i16* %A, align 8
   %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+; CHECK-NEXT:    vld1.16 {{{d[0-9]+\[[0-9]\]}}}, [r0:16]
   ret <8 x i16> %tmp3
 }
 
@@ -105,7 +105,7 @@ define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
   %tmp2 = load i32* %A, align 8
   %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+; CHECK-NEXT:    vld1.32 {{{d[0-9]+\[[0-9]\]}}}, [r0:32]
   ret <4 x i32> %tmp3
 }
 
@@ -116,7 +116,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
   %tmp5 = add <8 x i8> %tmp3, %tmp4
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :16]
+; CHECK-NEXT:    vld2.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:16]
   ret <8 x i8> %tmp5
 }
 
@@ -128,7 +128,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
   %tmp5 = add <4 x i16> %tmp3, %tmp4
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+; CHECK-NEXT:    vld2.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:32]
   ret <4 x i16> %tmp5
 }
 
@@ -165,7 +165,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
   %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
   %tmp5 = add <4 x i32> %tmp3, %tmp4
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+; CHECK-NEXT:    vld2.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
   ret <4 x i32> %tmp5
 }
 
@@ -249,7 +249,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp8 = add <8 x i8> %tmp5, %tmp6
   %tmp9 = add <8 x i8> %tmp7, %tmp8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :32]
+; CHECK-NEXT:    vld4.8 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:32]
   ret <8 x i8> %tmp9
 }
 
@@ -281,7 +281,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
   %tmp8 = add <2 x i32> %tmp5, %tmp6
   %tmp9 = add <2 x i32> %tmp7, %tmp8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+; CHECK-NEXT:    vld4.32 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
   ret <2 x i32> %tmp9
 }
 
@@ -297,7 +297,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
   %tmp8 = add <8 x i16> %tmp5, %tmp6
   %tmp9 = add <8 x i16> %tmp7, %tmp8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0, :64]
+; CHECK-NEXT:    vld4.16 {{{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}, {{d[0-9]+\[[0-9]\]}}}, [r0:64]
   ret <8 x i16> %tmp9
 }
 
diff --git a/test/NaCl/ARM/neon-vst1-sandboxing.ll b/test/NaCl/ARM/neon-vst1-sandboxing.ll
index 4c472aa216..361b8668a9 100644
--- a/test/NaCl/ARM/neon-vst1-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst1-sandboxing.ll
@@ -5,7 +5,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.8 {{{d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst1.8 {{{d[0-9]+}}}, [r0:64]
   ret void
 }
 
@@ -51,7 +51,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
   ret void
 }
 
@@ -62,7 +62,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vst1.16 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
   ret void
 }
 
diff --git a/test/NaCl/ARM/neon-vst2-sandboxing.ll b/test/NaCl/ARM/neon-vst2-sandboxing.ll
index f01064f877..155994abf8 100644
--- a/test/NaCl/ARM/neon-vst2-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst2-sandboxing.ll
@@ -5,7 +5,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+}}}, [r0:64]
   ret void
 }
 
@@ -14,7 +14,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %B
   call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+}}}, [r0:128]
   ret void
 }
 
@@ -42,7 +42,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst2.8 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:64]
   ret void
 }
 
@@ -53,7 +53,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vst2.16 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:128]
   ret void
 }
 
@@ -64,7 +64,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0, :256]
+; CHECK-NEXT:    vst2.32 {{{d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}}, [r0:256]
   ret void
 }
 
diff --git a/test/NaCl/ARM/neon-vst3-sandboxing.ll b/test/NaCl/ARM/neon-vst3-sandboxing.ll
index 856f728f16..1f6b641039 100644
--- a/test/NaCl/ARM/neon-vst3-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst3-sandboxing.ll
@@ -5,7 +5,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst3.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
   ret void
 }
 
diff --git a/test/NaCl/ARM/neon-vst4-sandboxing.ll b/test/NaCl/ARM/neon-vst4-sandboxing.ll
index 550de7dd72..e672d6e09d 100644
--- a/test/NaCl/ARM/neon-vst4-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vst4-sandboxing.ll
@@ -5,7 +5,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :64]
+; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:64]
   ret void
 }
 
@@ -14,7 +14,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %B
   call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :128]
+; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]
   ret void
 }
 
@@ -23,7 +23,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %B
   call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0, :256]
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}, [r0:256]
   ret void
 }
 
diff --git a/test/NaCl/ARM/neon-vstlane-sandboxing.ll b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
index 769a7c6712..d8a004b6af 100644
--- a/test/NaCl/ARM/neon-vstlane-sandboxing.ll
+++ b/test/NaCl/ARM/neon-vstlane-sandboxing.ll
@@ -15,7 +15,7 @@ define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp2 = extractelement <4 x i16> %tmp1, i32 2
   store i16 %tmp2, i16* %A, align 8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[2]}, [r0, :16]
+; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[2]}, [r0:16]
   ret void
 }
 
@@ -24,7 +24,7 @@ define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
   %tmp2 = extractelement <2 x i32> %tmp1, i32 1
   store i32 %tmp2, i32* %A, align 8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.32 {d{{[0-9]+}}[1]}, [r0, :32]
+; CHECK-NEXT:    vst1.32 {d{{[0-9]+}}[1]}, [r0:32]
   ret void
 }
 
@@ -46,7 +46,7 @@ define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
   %tmp2 = extractelement <8 x i16> %tmp1, i32 5
   store i16 %tmp2, i16* %A, align 8
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[1]}, [r0, :16]
+; CHECK-NEXT:    vst1.16 {d{{[0-9]+}}[1]}, [r0:16]
   ret void
 }
 
@@ -54,7 +54,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :16]
+; CHECK-NEXT:    vst2.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:16]
   ret void
 }
 
@@ -63,7 +63,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
   %tmp1 = load <4 x i16>* %B
   call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :32]
+; CHECK-NEXT:    vst2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:32]
   ret void
 }
 
@@ -94,7 +94,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0, :64]
+; CHECK-NEXT:    vst2.32 {d{{[0-9]+}}[0], d{{[0-9]+}}[0]}, [r0:64]
   ret void
 }
 
@@ -128,7 +128,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
   %tmp1 = load <8 x i8>* %B
   call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :32]
+; CHECK-NEXT:    vst4.8 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:32]
   ret void
 }
 
@@ -146,7 +146,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
   %tmp1 = load <2 x i32>* %B
   call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0, :128]
+; CHECK-NEXT:    vst4.32 {d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0:128]
   ret void
 }
 
@@ -157,7 +157,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK-NEXT:    vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]
   call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 ; CHECK:         bic r0, r0, #3221225472
-; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0, :64]
+; CHECK-NEXT:    vst4.16 {d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3], d{{[0-9]+}}[3]}, [r0:64]
   ret void
 }
author	Eli Bendersky <eliben@chromium.org>	2013-03-11 15:38:11 -0700
committer	Eli Bendersky <eliben@chromium.org>	2013-03-20 14:49:21 -0700
commit	d41567d2ffd3413600162653c08b2365bd5bcbbf (patch)
tree	aa1c212bcf816f4011315b80826acfb85ae7d9f3
parent	23c00401dad33ca247d2818e71540079bed63c5b (diff)