10 files changed, 767 insertions, 61 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
index 9acaf68c82..63af3957fb 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp
@@ -13,6 +13,8 @@
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCNaCl.h"
+#include "X86NaClDecls.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
@@ -28,12 +30,22 @@ using namespace llvm;
 // This option makes it possible to overwrite the x86 jmp mask immediate.
 // Setting it to -1 will effectively turn masking into a nop which will
 // help with linking this code with non-sandboxed libs (at least for x86-32).
-cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32));
+cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask",
+                               cl::init(-kNaClX86InstructionBundleSize));
 
 cl::opt<bool> FlagUseZeroBasedSandbox("sfi-zero-based-sandbox",
                                       cl::desc("Use a zero-based sandbox model"
                                                " for the NaCl SFI."),
                                       cl::init(false));
+// This flag can be set to false to test the performance impact of
+// hiding the sandbox base.
+cl::opt<bool> FlagHideSandboxBase("sfi-hide-sandbox-base",
+                                  cl::desc("Prevent 64-bit NaCl sandbox"
+                                           " pointers from being written to"
+                                           " the stack. [default=true]"),
+                                  cl::init(true));
+
+const int kNaClX86InstructionBundleSize = 32;
 
 static unsigned PrefixSaved = 0;
 static bool PrefixPass = false;
@@ -44,25 +56,134 @@ unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false);
 unsigned DemoteRegTo32_(unsigned RegIn);
 } // namespace
 
+static MCSymbol *CreateTempLabel(MCContext &Context, const char *Prefix) {
+  SmallString<128> NameSV;
+  raw_svector_ostream(NameSV)
+    << Context.getAsmInfo().getPrivateGlobalPrefix() // get internal label
+    << Prefix << Context.getUniqueSymbolID();
+  return Context.GetOrCreateSymbol(NameSV);
+}
+
 static void EmitDirectCall(const MCOperand &Op, bool Is64Bit,
                            MCStreamer &Out) {
-  Out.EmitBundleLock(true);
+  const bool HideSandboxBase = (FlagHideSandboxBase &&
+                                Is64Bit && !FlagUseZeroBasedSandbox);
+  if (HideSandboxBase) {
+    // For NaCl64, the sequence
+    //   call target
+    //   return_addr:
+    // is changed to
+    //   push return_addr
+    //   jmp target
+    //   .align 32
+    //   return_addr:
+    // This avoids exposing the sandbox base address via the return
+    // address on the stack.
+
+    MCContext &Context = Out.getContext();
+
+    // Generate a label for the return address.
+    MCSymbol *RetTarget = CreateTempLabel(Context, "DirectCallRetAddr");
+    const MCExpr *RetTargetExpr = MCSymbolRefExpr::Create(RetTarget, Context);
+
+    // push return_addr
+    MCInst PUSHInst;
+    PUSHInst.setOpcode(X86::PUSH64i32);
+    PUSHInst.addOperand(MCOperand::CreateExpr(RetTargetExpr));
+    Out.EmitInstruction(PUSHInst);
+
+    // jmp target
+    MCInst JMPInst;
+    JMPInst.setOpcode(X86::JMP_4);
+    JMPInst.addOperand(Op);
+    Out.EmitInstruction(JMPInst);
 
-  MCInst CALLInst;
-  CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
-  CALLInst.addOperand(Op);
-  Out.EmitInstruction(CALLInst);
-  Out.EmitBundleUnlock();
+    Out.EmitCodeAlignment(kNaClX86InstructionBundleSize);
+    Out.EmitLabel(RetTarget);
+  } else {
+    Out.EmitBundleLock(true);
+
+    MCInst CALLInst;
+    CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32);
+    CALLInst.addOperand(Op);
+    Out.EmitInstruction(CALLInst);
+    Out.EmitBundleUnlock();
+  }
 }
 
 static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
                                MCStreamer &Out) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  const bool HideSandboxBase = (FlagHideSandboxBase &&
+                                Is64Bit && !FlagUseZeroBasedSandbox);
   const int JmpMask = FlagSfiX86JmpMask;
-  const unsigned Reg32 = Op.getReg();
+  unsigned Reg32 = Op.getReg();
+
+  // For NaCl64, the sequence
+  //   jmp *%rXX
+  // is changed to
+  //   mov %rXX,%r11d
+  //   and $0xffffffe0,%r11d
+  //   add %r15,%r11
+  //   jmpq *%r11
+  //
+  // And the sequence
+  //   call *%rXX
+  //   return_addr:
+  // is changed to
+  //   mov %rXX,%r11d
+  //   push return_addr
+  //   and $0xffffffe0,%r11d
+  //   add %r15,%r11
+  //   jmpq *%r11
+  //   .align 32
+  //   return_addr:
+  //
+  // This avoids exposing the sandbox base address via the return
+  // address on the stack.
+
+  // For NaCl64, force an assignment of the branch target into r11,
+  // and subsequently use r11 as the ultimate branch target, so that
+  // only r11 (which will never be written to memory) exposes the
+  // sandbox base address.  But avoid a redundant assignment if the
+  // original branch target is already r11 or r11d.
+  const unsigned SafeReg32 = X86::R11D;
+  const unsigned SafeReg64 = X86::R11;
+  if (HideSandboxBase) {
+    // In some cases, EmitIndirectBranch() is called with a 32-bit
+    // register Op (e.g. r11d), and in other cases a 64-bit register
+    // (e.g. r11), so we need to test both variants to avoid a
+    // redundant assignment.  TODO(stichnot): Make callers consistent
+    // on 32 vs 64 bit register.
+    if ((Reg32 != SafeReg32) && (Reg32 != SafeReg64)) {
+      MCInst MOVInst;
+      MOVInst.setOpcode(X86::MOV32rr);
+      MOVInst.addOperand(MCOperand::CreateReg(SafeReg32));
+      MOVInst.addOperand(MCOperand::CreateReg(Reg32));
+      Out.EmitInstruction(MOVInst);
+      Reg32 = SafeReg32;
+    }
+  }
   const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64);
 
-  Out.EmitBundleLock(IsCall);
+  // Explicitly push the (32-bit) return address for a NaCl64 call
+  // instruction.
+  MCSymbol *RetTarget = NULL;
+  if (IsCall && HideSandboxBase) {
+    MCContext &Context = Out.getContext();
+
+    // Generate a label for the return address.
+    RetTarget = CreateTempLabel(Context, "IndirectCallRetAddr");
+    const MCExpr *RetTargetExpr = MCSymbolRefExpr::Create(RetTarget, Context);
+
+    // push return_addr
+    MCInst PUSHInst;
+    PUSHInst.setOpcode(X86::PUSH64i32);
+    PUSHInst.addOperand(MCOperand::CreateExpr(RetTargetExpr));
+    Out.EmitInstruction(PUSHInst);
+  }
+
+  const bool WillEmitCallInst = IsCall && !HideSandboxBase;
+  Out.EmitBundleLock(WillEmitCallInst);
 
   MCInst ANDInst;
   ANDInst.setOpcode(X86::AND32ri8);
@@ -71,7 +192,7 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
   ANDInst.addOperand(MCOperand::CreateImm(JmpMask));
   Out.EmitInstruction(ANDInst);
 
-  if (Is64Bit && !UseZeroBasedSandbox) {
+  if (Is64Bit && !FlagUseZeroBasedSandbox) {
     MCInst InstADD;
     InstADD.setOpcode(X86::ADD64rr);
     InstADD.addOperand(MCOperand::CreateReg(Reg64));
@@ -80,24 +201,40 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall,
     Out.EmitInstruction(InstADD);
   }
 
-  if (IsCall) {
+  if (WillEmitCallInst) {
+    // callq *%rXX
     MCInst CALLInst;
     CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r);
     CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
     Out.EmitInstruction(CALLInst);
   } else {
+    // jmpq *%rXX   -or-   jmpq *%r11
     MCInst JMPInst;
     JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r);
     JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32));
     Out.EmitInstruction(JMPInst);
   }
   Out.EmitBundleUnlock();
+  if (RetTarget) {
+    Out.EmitCodeAlignment(kNaClX86InstructionBundleSize);
+    Out.EmitLabel(RetTarget);
+  }
 }
 
 static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) {
+  // For NaCl64 returns, follow the convention of using r11 to hold
+  // the target of an indirect jump to avoid potentially leaking the
+  // sandbox base address.
+  const bool HideSandboxBase = (FlagHideSandboxBase &&
+                                Is64Bit && !FlagUseZeroBasedSandbox);
+  // For NaCl64 sandbox hiding, use r11 to hold the branch target.
+  // Otherwise, use rcx/ecx for fewer instruction bytes (no REX
+  // prefix).
+  const unsigned RegTarget = HideSandboxBase ? X86::R11 :
+    (Is64Bit ? X86::RCX : X86::ECX);
   MCInst POPInst;
   POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r);
-  POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX));
+  POPInst.addOperand(MCOperand::CreateReg(RegTarget));
   Out.EmitInstruction(POPInst);
 
   if (AmtOp) {
@@ -113,7 +250,7 @@ static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) {
 
   MCInst JMPInst;
   JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r);
-  JMPInst.addOperand(MCOperand::CreateReg(X86::ECX));
+  JMPInst.addOperand(MCOperand::CreateReg(RegTarget));
   Out.EmitInstruction(JMPInst);
 }
 
@@ -121,8 +258,7 @@ static void EmitTrap(bool Is64Bit, MCStreamer &Out) {
   // Rewrite to:
   //    X86-32:  mov $0, 0
   //    X86-64:  mov $0, (%r15)
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
-  unsigned BaseReg = Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0;
+  unsigned BaseReg = Is64Bit && !FlagUseZeroBasedSandbox ? X86::R15 : 0;
 
   MCInst Tmp;
   Tmp.setOpcode(X86::MOV32mi);
@@ -140,8 +276,7 @@ static void EmitTrap(bool Is64Bit, MCStreamer &Out) {
 static void EmitRegFix(unsigned Reg64, MCStreamer &Out) {
   // lea (%rsp, %r15, 1), %rsp
   // We do not need to add the R15 base for the zero-based sandbox model
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
-  if (!UseZeroBasedSandbox) {
+  if (!FlagUseZeroBasedSandbox) {
     MCInst Tmp;
     Tmp.setOpcode(X86::LEA64r);
     Tmp.addOperand(MCOperand::CreateReg(Reg64));    // DestReg
@@ -215,9 +350,8 @@ static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) {
 
 static void HandleMemoryRefTruncation(MCInst *Inst, unsigned IndexOpPosition,
                                       MCStreamer &Out) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg();
-  if (UseZeroBasedSandbox) {
+  if (FlagUseZeroBasedSandbox) {
     // With the zero-based sandbox, we use a 32-bit register on the index
     Inst->getOperand(IndexOpPosition).setReg(DemoteRegTo32_(IndexReg));
   } else {
@@ -352,7 +486,6 @@ namespace llvm {
 //   these instead of combined instructions. At this time, having only
 //   one explicit prefix is supported.
 bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   // If we are emitting to .s, just emit all pseudo-instructions directly.
   if (Out.hasRawTextSupport()) {
     return false;
@@ -473,7 +606,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
     unsigned PrefixLocal = PrefixSaved;
     PrefixSaved = 0;
 
-    if (PrefixLocal || !UseZeroBasedSandbox)
+    if (PrefixLocal || !FlagUseZeroBasedSandbox)
       Out.EmitBundleLock(false);
 
     HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out);
@@ -483,7 +616,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) {
       EmitPrefix(PrefixLocal, Out);
     Out.EmitInstruction(SandboxedInst);
 
-    if (PrefixLocal || !UseZeroBasedSandbox)
+    if (PrefixLocal || !FlagUseZeroBasedSandbox)
       Out.EmitBundleUnlock();
     return true;
   }
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index b024817891..89485cb06c 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -15,6 +15,7 @@
 #include "X86InstrBuilder.h"
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
+#include "X86NaClDecls.h" // @LOCALMOD
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
@@ -756,8 +757,52 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     MFI->setOffsetAdjustment(-NumBytes);
 
     // Save EBP/RBP into the appropriate stack slot.
+    // @LOCALMOD-BEGIN
+    unsigned RegToPush = FramePtr;
+    const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+    const bool HideSandboxBase = (FlagHideSandboxBase &&
+                                  Subtarget->isTargetNaCl64() &&
+                                  !FlagUseZeroBasedSandbox);
+    if (HideSandboxBase) {
+      // Hide the sandbox base address by masking off the upper 32
+      // bits of the pushed/saved RBP on the stack, using:
+      //   mov %ebp, %r10d
+      //   push %r10
+      // instead of:
+      //   push %rbp
+      // Additionally, we can use rax instead of r10 when it is not a
+      // varargs function and therefore rax is available, saving one
+      // byte of REX prefix per instruction.
+      // Note that the epilog already adds R15 when restoring RBP.
+
+      // mov %ebp, %r10d
+      unsigned RegToPushLower;
+      if (Fn->isVarArg()) {
+        // Note: This use of r10 in the prolog can't be used with the
+        // gcc "nest" attribute, due to its use of r10.  Example:
+        // target triple = "x86_64-pc-linux-gnu"
+        // define i64 @func(i64 nest %arg) {
+        //   ret i64 %arg
+        // }
+        //
+        // $ clang -m64 llvm_nest_attr.ll -S -o -
+        // ...
+        // func:
+        //     movq    %r10, %rax
+        //     ret
+        RegToPush = X86::R10;
+        RegToPushLower = X86::R10D;
+      } else {
+        RegToPush = X86::RAX;
+        RegToPushLower = X86::EAX;
+      }
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rr), RegToPushLower)
+        .addReg(FramePtr)
+        .setMIFlag(MachineInstr::FrameSetup);
+    }
+    // @LOCALMOD-END
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
-      .addReg(FramePtr, RegState::Kill)
+      .addReg(RegToPush, RegState::Kill) // @LOCALMOD
       .setMIFlag(MachineInstr::FrameSetup);
 
     if (needsFrameMoves) {
diff --git a/lib/Target/X86/X86NaClDecls.h b/lib/Target/X86/X86NaClDecls.h
new file mode 100644
index 0000000000..4050187c68
--- /dev/null
+++ b/lib/Target/X86/X86NaClDecls.h
@@ -0,0 +1,28 @@
+//===-- X86NaClDecls.h - Common X86 NaCl declarations -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides various NaCl-related declarations for the X86-32
+// and X86-64 architectures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86NACLDECLS_H
+#define X86NACLDECLS_H
+
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+extern const int kNaClX86InstructionBundleSize;
+
+extern cl::opt<bool> FlagRestrictR15;
+extern cl::opt<bool> FlagUseZeroBasedSandbox;
+extern cl::opt<bool> FlagHideSandboxBase;
+
+#endif    // X86NACLDECLS_H
diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp
index 846c72f452..8a131029e2 100644
--- a/lib/Target/X86/X86NaClRewritePass.cpp
+++ b/lib/Target/X86/X86NaClRewritePass.cpp
@@ -19,6 +19,7 @@
 
 #include "X86.h"
 #include "X86InstrInfo.h"
+#include "X86NaClDecls.h"
 #include "X86Subtarget.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -32,7 +33,6 @@
 
 using namespace llvm;
 
-extern cl::opt<bool> FlagUseZeroBasedSandbox;
 cl::opt<bool> FlagRestrictR15("sfi-restrict-r15",
                               cl::desc("Restrict use of %r15.  This flag can"
                                        " be turned off for the zero-based"
@@ -142,9 +142,8 @@ static bool IsDirectBranch(const MachineInstr &MI) {
 }
 
 static bool IsRegAbsolute(unsigned Reg) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   const bool RestrictR15 = FlagRestrictR15;
-  assert(UseZeroBasedSandbox || RestrictR15);
+  assert(FlagUseZeroBasedSandbox || RestrictR15);
   return (Reg == X86::RSP || Reg == X86::RBP ||
           (Reg == X86::R15 && RestrictR15));
 }
@@ -219,7 +218,6 @@ X86NaClRewritePass::TraceLog(const char *func,
 
 bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   TraceLog("ApplyStackSFI", MBB, MBBI);
   assert(Is64Bit);
   MachineInstr &MI = *MBBI;
@@ -249,7 +247,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
   if (NewOpc) {
     BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
       .addImm(MI.getOperand(2).getImm())
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     MI.eraseFromParent();
     return true;
   }
@@ -288,7 +286,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
     const MachineOperand &Offset = MI.getOperand(4);
     BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32))
       .addImm(Offset.getImm())
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     MI.eraseFromParent();
     return true;
   }
@@ -296,7 +294,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
   if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) {
     BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr))
       .addReg(DemoteRegTo32(MI.getOperand(1).getReg()))
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     MI.eraseFromParent();
     return true;
   }
@@ -308,7 +306,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
       .addOperand(MI.getOperand(3)) // Index
       .addOperand(MI.getOperand(4)) // Offset
       .addOperand(MI.getOperand(5)) // Segment
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     MI.eraseFromParent();
     return true;
   }
@@ -319,7 +317,6 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB,
 
 bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   TraceLog("ApplyFrameSFI", MBB, MBBI);
   assert(Is64Bit);
   MachineInstr &MI = *MBBI;
@@ -343,7 +340,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
     // To:      naclrestbp %eX, %rZP
     BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr))
       .addReg(DemoteRegTo32(SrcReg))
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP
     MI.eraseFromParent();
     return true;
   }
@@ -353,7 +350,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
     assert(MI.getOperand(0).getReg() == X86::RBP);
 
     // Zero-based sandbox model uses address clipping
-    if (UseZeroBasedSandbox)
+    if (FlagUseZeroBasedSandbox)
       return false;
 
     // Rewrite: mov %rbp, (...)
@@ -364,7 +361,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
       .addOperand(MI.getOperand(3))  // Index
       .addOperand(MI.getOperand(4))  // Offset
       .addOperand(MI.getOperand(5))  // Segment
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP
     MI.eraseFromParent();
     return true;
   }
@@ -389,11 +386,11 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
       .addReg(0)  // Index
       .addImm(0)  // Offset
       .addReg(0)  // Segment
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP
 
     BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8))
       .addImm(8)
-      .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
 
     MI.eraseFromParent();
     return true;
@@ -405,7 +402,8 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB,
 
 bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
                                          MachineBasicBlock::iterator MBBI) {
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
+  const bool HideSandboxBase = (FlagHideSandboxBase &&
+                                Is64Bit && !FlagUseZeroBasedSandbox);
   TraceLog("ApplyControlSFI", MBB, MBBI);
   MachineInstr &MI = *MBBI;
 
@@ -436,7 +434,7 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
      BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
        .addOperand(MI.getOperand(0));
     if (Is64Bit) {
-      NewMI.addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      NewMI.addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     }
     MI.eraseFromParent();
     return true;
@@ -451,25 +449,32 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
       Opc == X86::RETI) {
     // To maintain compatibility with nacl-as, for now we don't emit naclret.
     // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32));
+    //
+    // For NaCl64 returns, follow the convention of using r11 to hold
+    // the target of an indirect jump to avoid potentially leaking the
+    // sandbox base address.
+    unsigned RegTarget;
     if (Is64Bit) {
-      BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX);
+      RegTarget = (HideSandboxBase ? X86::R11 : X86::RCX);
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), RegTarget);
       if (Opc == X86::RETI) {
         BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi32))
           .addOperand(MI.getOperand(0))
-          .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+          .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
       }
       BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r))
-        .addReg(X86::ECX)
-        .addReg(UseZeroBasedSandbox ? 0 : X86::R15);
+        .addReg(RegTarget)
+        .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
     } else {
-      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX);
+      RegTarget = X86::ECX;
+      BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), RegTarget);
       if (Opc == X86::RETI) {
         BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP)
           .addReg(X86::ESP)
           .addOperand(MI.getOperand(0));
       }
       BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r))
-        .addReg(X86::ECX);
+        .addReg(RegTarget);
     }
     MI.eraseFromParent();
     return true;
@@ -480,7 +485,7 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB,
     // To maintain compatibility with nacl-as, for now we don't emit nacltrap.
     // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32));
     BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi))
-      .addReg(Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0) // Base
+      .addReg(Is64Bit && !FlagUseZeroBasedSandbox ? X86::R15 : 0) // Base
       .addImm(1) // Scale
       .addReg(0) // Index
       .addImm(0) // Offset
@@ -502,7 +507,6 @@ bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB,
   TraceLog("ApplyMemorySFI", MBB, MBBI);
   assert(Is64Bit);
   MachineInstr &MI = *MBBI;
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
 
   if (!IsLoad(MI) && !IsStore(MI))
     return false;
@@ -545,9 +549,9 @@ bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB,
   } else {
     if (!BaseReg.getReg()) {
       // No base, fill in relative.
-      BaseReg.setReg(UseZeroBasedSandbox ? 0 : X86::R15);
+      BaseReg.setReg(FlagUseZeroBasedSandbox ? 0 : X86::R15);
       AddrReg = IndexReg.getReg();
-    } else if (!UseZeroBasedSandbox) {
+    } else if (!FlagUseZeroBasedSandbox) {
       // Switch base and index registers if index register is undefined.
       // That is do conversions like "mov d(%r,0,0) -> mov d(%r15, %r, 1)".
       assert (!IndexReg.getReg()
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index bab08b69df..67bac130b5 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -17,6 +17,7 @@
 #include "X86.h"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
+#include "X86NaClDecls.h" // @LOCALMOD
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/BitVector.h"
@@ -54,11 +55,6 @@ static cl::opt<bool>
 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
 
-// @LOCALMOD-BEGIN
-extern cl::opt<bool> FlagUseZeroBasedSandbox;
-extern cl::opt<bool> FlagRestrictR15;
-// @LOCALMOD-END
-
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
   : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
@@ -394,9 +390,8 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // @LOCALMOD-START
   const X86Subtarget& Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
-  const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox;
   const bool RestrictR15 = FlagRestrictR15;
-  assert(UseZeroBasedSandbox || RestrictR15);
+  assert(FlagUseZeroBasedSandbox || RestrictR15);
   if (Subtarget.isTargetNaCl64()) {
     if (RestrictR15) {
       Reserved.set(X86::R15);
@@ -408,6 +403,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(X86::EBP);
     Reserved.set(X86::BP);
     Reserved.set(X86::BPL);
+    const bool RestrictR11 = FlagHideSandboxBase && !FlagUseZeroBasedSandbox;
+    if (RestrictR11) {
+      // Restrict r11 so that it can be used for indirect jump
+      // sequences that don't leak the sandbox base address onto the
+      // stack.
+      Reserved.set(X86::R11);
+      Reserved.set(X86::R11D);
+      Reserved.set(X86::R11W);
+      Reserved.set(X86::R11B);
+    }
   }
   // @LOCALMOD-END
 
diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll
index acfa64582c..ae1998cd54 100644
--- a/test/CodeGen/X86/fast-isel-x86-64.ll
+++ b/test/CodeGen/X86/fast-isel-x86-64.ll
@@ -55,8 +55,8 @@ define i32 @test3_nacl64() nounwind {
 
 ; NACL64_PIC: test3_nacl64:
 ; NACL64_PIC: movl G@GOTPCREL(%rip), %eax
-; NACL64_PIC-NEXT: popq    %rcx
-; NACL64_PIC-NEXT: nacljmp %ecx, %r15
+; NACL64_PIC-NEXT: popq    %r11
+; NACL64_PIC-NEXT: nacljmp %r11, %r15
 }
 
 
@@ -316,8 +316,8 @@ define void @test23(i8* noalias sret %result) {
 ; NACL64: test23:
 ; NACL64: call
 ; NACL64: movl  %edi, %eax
-; NACL64: popq %rcx
-; NACL64: nacljmp %ecx, %r15
+; NACL64: popq %r11
+; NACL64: nacljmp %r11, %r15
 }
 
 declare i8* @foo23()
diff --git a/test/NaCl/X86/pnacl-avoids-r11-x86-64.c b/test/NaCl/X86/pnacl-avoids-r11-x86-64.c
new file mode 100644
index 0000000000..65b921486f
--- /dev/null
+++ b/test/NaCl/X86/pnacl-avoids-r11-x86-64.c
@@ -0,0 +1,79 @@
+/*
+  Object file built using:
+  pnacl-clang -S -O2 -emit-llvm -o pnacl-avoids-r11-x86-64.ll \
+      pnacl-avoids-r11-x86-64.c
+  Then the comments below should be pasted into the .ll file,
+  replacing "RUNxxx" with "RUN".
+
+; The NACLON test verifies that %r11 and %r11d are not used except as
+; part of the return sequence.
+;
+; RUNxxx: pnacl-llc -O2 -mtriple=x86_64-none-nacl < %s | \
+; RUNxxx:     FileCheck %s --check-prefix=NACLON
+;
+; The NACLOFF test verifies that %r11 would normally be used if PNaCl
+; weren't reserving r11 for its own uses, to be sure NACLON is a
+; valid test.
+;
+; RUNxxx: pnacl-llc -O2 -mtriple=x86_64-linux < %s | \
+; RUNxxx:     FileCheck %s --check-prefix=NACLOFF
+;
+; NACLON: RegisterPressure:
+; NACLON-NOT: %r11
+; NACLON: popq %r11
+; NACLON: nacljmp %r11, %r15
+;
+; NACLOFF: RegisterPressure:
+; NACLOFF: %r11
+; NACLOFF: ret
+
+*/
+
+// Function RegisterPressure() tries to induce maximal integer
+// register pressure in a ~16 register machine, for both scratch and
+// preserved registers.  Repeated calls to Use() are designed to
+// use all the preserved registers.  The calculations on the local
+// variables between function calls are designed to use all the
+// scratch registers.
+
+void RegisterPressure(void)
+{
+  extern void Use(int, int, int, int, int, int, int, int,
+                  int, int, int, int, int, int, int, int);
+  extern int GetValue(void);
+  extern volatile int v1a, v1b, v2a, v2b, v3a, v3b, v4a, v4b;
+
+  int i00 = GetValue();
+  int i01 = GetValue();
+  int i02 = GetValue();
+  int i03 = GetValue();
+  int i04 = GetValue();
+  int i05 = GetValue();
+  int i06 = GetValue();
+  int i07 = GetValue();
+  int i08 = GetValue();
+  int i09 = GetValue();
+  int i10 = GetValue();
+  int i11 = GetValue();
+  int i12 = GetValue();
+  int i13 = GetValue();
+  int i14 = GetValue();
+  int i15 = GetValue();
+
+  Use(i00, i01, i02, i03, i04, i05, i06, i07,
+      i08, i09, i10, i11, i12, i13, i14, i15);
+  Use(i00, i01, i02, i03, i04, i05, i06, i07,
+      i08, i09, i10, i11, i12, i13, i14, i15);
+  v1a = i00 + i01 + i02 + i03 + i04 + i05 + i06 + i07;
+  v1b = i08 + i09 + i10 + i11 + i12 + i13 + i14 + i15;
+  v2a = i00 + i01 + i02 + i03 + i08 + i09 + i10 + i11;
+  v2b = i04 + i05 + i06 + i07 + i12 + i13 + i14 + i15;
+  v3a = i00 + i01 + i04 + i05 + i08 + i09 + i12 + i13;
+  v3b = i02 + i03 + i06 + i07 + i10 + i11 + i14 + i15;
+  v4a = i00 + i02 + i04 + i06 + i08 + i10 + i12 + i14;
+  v4b = i01 + i03 + i05 + i07 + i09 + i11 + i13 + i15;
+  Use(i00, i01, i02, i03, i04, i05, i06, i07,
+      i08, i09, i10, i11, i12, i13, i14, i15);
+  Use(i00, i01, i02, i03, i04, i05, i06, i07,
+      i08, i09, i10, i11, i12, i13, i14, i15);
+}
diff --git a/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll b/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll
new file mode 100644
index 0000000000..04cb4bfd3c
--- /dev/null
+++ b/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll
@@ -0,0 +1,131 @@
+; The NACLON test verifies that %r11 and %r11d are not used except as
+; part of the return sequence.
+;
+; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl < %s | \
+; RUN:     FileCheck %s --check-prefix=NACLON
+;
+; The NACLOFF test verifies that %r11 would normally be used if PNaCl
+; weren't reserving r11 for its own uses, to be sure NACLON is a
+; valid test.
+;
+; RUN: pnacl-llc -O2 -mtriple=x86_64-linux < %s | \
+; RUN:     FileCheck %s --check-prefix=NACLOFF
+;
+; NACLON: RegisterPressure:
+; NACLON-NOT: %r11
+; NACLON: popq %r11
+; NACLON: nacljmp %r11, %r15
+;
+; NACLOFF: RegisterPressure:
+; NACLOFF: %r11
+; NACLOFF: ret
+; ModuleID = 'pnacl-avoids-r11-x86-64.c'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+@v1a = external global i32
+@v1b = external global i32
+@v2a = external global i32
+@v2b = external global i32
+@v3a = external global i32
+@v3b = external global i32
+@v4a = external global i32
+@v4b = external global i32
+
+; Function Attrs: nounwind
+define void @RegisterPressure() #0 {
+entry:
+  %call = tail call i32 @GetValue() #2
+  %call1 = tail call i32 @GetValue() #2
+  %call2 = tail call i32 @GetValue() #2
+  %call3 = tail call i32 @GetValue() #2
+  %call4 = tail call i32 @GetValue() #2
+  %call5 = tail call i32 @GetValue() #2
+  %call6 = tail call i32 @GetValue() #2
+  %call7 = tail call i32 @GetValue() #2
+  %call8 = tail call i32 @GetValue() #2
+  %call9 = tail call i32 @GetValue() #2
+  %call10 = tail call i32 @GetValue() #2
+  %call11 = tail call i32 @GetValue() #2
+  %call12 = tail call i32 @GetValue() #2
+  %call13 = tail call i32 @GetValue() #2
+  %call14 = tail call i32 @GetValue() #2
+  %call15 = tail call i32 @GetValue() #2
+  tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2
+  tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2
+  %add = add nsw i32 %call1, %call
+  %add16 = add nsw i32 %add, %call2
+  %add17 = add nsw i32 %add16, %call3
+  %add18 = add nsw i32 %add17, %call4
+  %add19 = add nsw i32 %add18, %call5
+  %add20 = add nsw i32 %add19, %call6
+  %add21 = add nsw i32 %add20, %call7
+  store volatile i32 %add21, i32* @v1a, align 4, !tbaa !0
+  %add22 = add nsw i32 %call9, %call8
+  %add23 = add nsw i32 %add22, %call10
+  %add24 = add nsw i32 %add23, %call11
+  %add25 = add nsw i32 %add24, %call12
+  %add26 = add nsw i32 %add25, %call13
+  %add27 = add nsw i32 %add26, %call14
+  %add28 = add nsw i32 %add27, %call15
+  store volatile i32 %add28, i32* @v1b, align 4, !tbaa !0
+  %add32 = add nsw i32 %call8, %add17
+  %add33 = add nsw i32 %add32, %call9
+  %add34 = add nsw i32 %add33, %call10
+  %add35 = add nsw i32 %add34, %call11
+  store volatile i32 %add35, i32* @v2a, align 4, !tbaa !0
+  %add36 = add nsw i32 %call5, %call4
+  %add37 = add nsw i32 %add36, %call6
+  %add38 = add nsw i32 %add37, %call7
+  %add39 = add nsw i32 %add38, %call12
+  %add40 = add nsw i32 %add39, %call13
+  %add41 = add nsw i32 %add40, %call14
+  %add42 = add nsw i32 %add41, %call15
+  store volatile i32 %add42, i32* @v2b, align 4, !tbaa !0
+  %add44 = add nsw i32 %call4, %add
+  %add45 = add nsw i32 %add44, %call5
+  %add46 = add nsw i32 %add45, %call8
+  %add47 = add nsw i32 %add46, %call9
+  %add48 = add nsw i32 %add47, %call12
+  %add49 = add nsw i32 %add48, %call13
+  store volatile i32 %add49, i32* @v3a, align 4, !tbaa !0
+  %add50 = add nsw i32 %call3, %call2
+  %add51 = add nsw i32 %add50, %call6
+  %add52 = add nsw i32 %add51, %call7
+  %add53 = add nsw i32 %add52, %call10
+  %add54 = add nsw i32 %add53, %call11
+  %add55 = add nsw i32 %add54, %call14
+  %add56 = add nsw i32 %add55, %call15
+  store volatile i32 %add56, i32* @v3b, align 4, !tbaa !0
+  %add57 = add nsw i32 %call2, %call
+  %add58 = add nsw i32 %add57, %call4
+  %add59 = add nsw i32 %add58, %call6
+  %add60 = add nsw i32 %add59, %call8
+  %add61 = add nsw i32 %add60, %call10
+  %add62 = add nsw i32 %add61, %call12
+  %add63 = add nsw i32 %add62, %call14
+  store volatile i32 %add63, i32* @v4a, align 4, !tbaa !0
+  %add64 = add nsw i32 %call3, %call1
+  %add65 = add nsw i32 %add64, %call5
+  %add66 = add nsw i32 %add65, %call7
+  %add67 = add nsw i32 %add66, %call9
+  %add68 = add nsw i32 %add67, %call11
+  %add69 = add nsw i32 %add68, %call13
+  %add70 = add nsw i32 %add69, %call15
+  store volatile i32 %add70, i32* @v4b, align 4, !tbaa !0
+  tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2
+  tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2
+  ret void
+}
+
+declare i32 @GetValue() #1
+
+declare void @Use(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c
new file mode 100644
index 0000000000..b1fc9ce83d
--- /dev/null
+++ b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c
@@ -0,0 +1,120 @@
+/*
+  Object file built using:
+  pnacl-clang -S -O0 -emit-llvm -o pnacl-hides-sandbox-x86-64.ll \
+      pnacl-hides-sandbox-x86-64.c
+  Then the comments below should be pasted into the .ll file.
+
+; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \
+; RUN:     llvm-objdump -d -r - | FileCheck %s
+; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \
+; RUN:     llvm-objdump -d -r - | FileCheck %s --check-prefix=NOCALLRET
+;
+; CHECK: TestDirectCall:
+; Push the immediate return address
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; Immediate jump to the target
+; CHECK:      jmpq 0
+; CHECK-NEXT: DirectCallTarget
+; Return label
+; CHECK:      DirectCallRetAddr
+;
+; CHECK: TestIndirectCall:
+; Push the immediate return address
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; Fixed sequence for indirect jump
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+; Return label
+; CHECK:      IndirectCallRetAddr
+;
+; Verify that the old frame pointer isn't leaked when saved
+; CHECK: TestMaskedFramePointer:
+; CHECK: movl    %ebp, %eax
+; CHECK: pushq   %rax
+; CHECK: movq    %rsp, %rbp
+;
+; Verify use of r10 instead of rax in the presence of varargs,
+; when saving the old rbp.
+; CHECK: TestMaskedFramePointerVarargs:
+; CHECK: movl    %ebp, %r10d
+; CHECK: pushq   %r10
+; CHECK: movq    %rsp, %rbp
+;
+; Test the indirect jump sequence derived from a "switch" statement.
+; CHECK: TestIndirectJump:
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+; At least 4 "jmp"s due to 5 switch cases
+; CHECK:      jmp
+; CHECK:      jmp
+; CHECK:      jmp
+; CHECK:      jmp
+; At least 1 direct call to puts()
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; CHECK:      jmpq 0
+; CHECK-NEXT: puts
+;
+; Return sequence is just the indirect jump sequence
+; CHECK: TestReturn:
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+;
+; Special test that no "call" or "ret" instructions are generated.
+; NOCALLRET-NOT: call
+; NOCALLRET-NOT: ret
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+
+void TestDirectCall(void) {
+  extern void DirectCallTarget(void);
+  DirectCallTarget();
+}
+
+void TestIndirectCall(void) {
+  extern void (*IndirectCallTarget)(void);
+  IndirectCallTarget();
+}
+
+void TestMaskedFramePointer(int Arg) {
+  extern void Consume(void *);
+  // Calling alloca() is one way to force the rbp frame pointer.
+  void *Tmp = alloca(Arg);
+  Consume(Tmp);
+}
+
+void TestMaskedFramePointerVarargs(int Arg, ...) {
+  extern void Consume(void *);
+  void *Tmp = alloca(Arg);
+  Consume(Tmp);
+}
+
+void TestIndirectJump(int Arg) {
+  switch (Arg) {
+  case 2:
+    puts("Prime 1");
+    break;
+  case 3:
+    puts("Prime 2");
+    break;
+  case 5:
+    puts("Prime 3");
+    break;
+  case 7:
+    puts("Prime 4");
+    break;
+  case 11:
+    puts("Prime 5");
+    break;
+  }
+}
+
+void TestReturn(void) {
+}
diff --git a/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll
new file mode 100644
index 0000000000..e64798f66b
--- /dev/null
+++ b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll
@@ -0,0 +1,161 @@
+; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \
+; RUN:     llvm-objdump -d -r - | FileCheck %s
+; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \
+; RUN:     llvm-objdump -d -r - | FileCheck %s --check-prefix=NOCALLRET
+
+; ModuleID = 'pnacl-hides-sandbox-x86-64.c'
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+@IndirectCallTarget = external global void ()*
+@.str = private unnamed_addr constant [8 x i8] c"Prime 1\00", align 1
+@.str1 = private unnamed_addr constant [8 x i8] c"Prime 2\00", align 1
+@.str2 = private unnamed_addr constant [8 x i8] c"Prime 3\00", align 1
+@.str3 = private unnamed_addr constant [8 x i8] c"Prime 4\00", align 1
+@.str4 = private unnamed_addr constant [8 x i8] c"Prime 5\00", align 1
+
+; Function Attrs: nounwind
+define void @TestDirectCall() #0 {
+entry:
+  call void @DirectCallTarget()
+  ret void
+}
+; CHECK: TestDirectCall:
+; Push the immediate return address
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; Immediate jump to the target
+; CHECK:      jmpq 0
+; CHECK-NEXT: DirectCallTarget
+
+declare void @DirectCallTarget() #1
+
+; Function Attrs: nounwind
+define void @TestIndirectCall() #0 {
+entry:
+  %0 = load void ()** @IndirectCallTarget, align 4
+  call void %0()
+  ret void
+}
+; CHECK: TestIndirectCall:
+; Push the immediate return address
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; Fixed sequence for indirect jump
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+
+; Function Attrs: nounwind
+define void @TestMaskedFramePointer(i32 %Arg) #0 {
+entry:
+  %Arg.addr = alloca i32, align 4
+  %Tmp = alloca i8*, align 4
+  store i32 %Arg, i32* %Arg.addr, align 4
+  %0 = load i32* %Arg.addr, align 4
+  %1 = alloca i8, i32 %0
+  store i8* %1, i8** %Tmp, align 4
+  %2 = load i8** %Tmp, align 4
+  call void @Consume(i8* %2)
+  ret void
+}
+; Verify that the old frame pointer isn't leaked when saved
+; CHECK: TestMaskedFramePointer:
+; CHECK: movl    %ebp, %eax
+; CHECK: pushq   %rax
+; CHECK: movq    %rsp, %rbp
+
+declare void @Consume(i8*) #1
+
+; Function Attrs: nounwind
+define void @TestMaskedFramePointerVarargs(i32 %Arg, ...) #0 {
+entry:
+  %Arg.addr = alloca i32, align 4
+  %Tmp = alloca i8*, align 4
+  store i32 %Arg, i32* %Arg.addr, align 4
+  %0 = load i32* %Arg.addr, align 4
+  %1 = alloca i8, i32 %0
+  store i8* %1, i8** %Tmp, align 4
+  %2 = load i8** %Tmp, align 4
+  call void @Consume(i8* %2)
+  ret void
+}
+; Verify use of r10 instead of rax in the presence of varargs,
+; when saving the old rbp.
+; CHECK: TestMaskedFramePointerVarargs:
+; CHECK: movl    %ebp, %r10d
+; CHECK: pushq   %r10
+; CHECK: movq    %rsp, %rbp
+
+; Function Attrs: nounwind
+define void @TestIndirectJump(i32 %Arg) #0 {
+entry:
+  %Arg.addr = alloca i32, align 4
+  store i32 %Arg, i32* %Arg.addr, align 4
+  %0 = load i32* %Arg.addr, align 4
+  switch i32 %0, label %sw.epilog [
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb1
+    i32 5, label %sw.bb3
+    i32 7, label %sw.bb5
+    i32 11, label %sw.bb7
+  ]
+
+sw.bb:                                            ; preds = %entry
+  %call = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0))
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %entry
+  %call2 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0))
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  %call4 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0))
+  br label %sw.epilog
+
+sw.bb5:                                           ; preds = %entry
+  %call6 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str3, i32 0, i32 0))
+  br label %sw.epilog
+
+sw.bb7:                                           ; preds = %entry
+  %call8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str4, i32 0, i32 0))
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+  ret void
+}
+; Test the indirect jump sequence derived from a "switch" statement.
+; CHECK: TestIndirectJump:
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+; At least 4 "jmp"s due to 5 switch cases
+; CHECK:      jmp
+; CHECK:      jmp
+; CHECK:      jmp
+; CHECK:      jmp
+; At least 1 direct call to puts()
+; CHECK:      pushq $0
+; CHECK-NEXT: .text
+; CHECK:      jmpq 0
+; CHECK-NEXT: puts
+
+declare i32 @puts(i8*) #1
+
+; Function Attrs: nounwind
+define void @TestReturn() #0 {
+entry:
+  ret void
+}
+; Return sequence is just the indirect jump sequence
+; CHECK: TestReturn:
+; CHECK:      andl $-32, %r11d
+; CHECK-NEXT: addq %r15, %r11
+; CHECK-NEXT: jmpq *%r11
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; Special test that no "call" or "ret" instructions are generated.
+; NOCALLRET-NOT: call
+; NOCALLRET-NOT: ret