diff options
-rw-r--r-- | lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp | 179 | ||||
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 47 | ||||
-rw-r--r-- | lib/Target/X86/X86NaClDecls.h | 28 | ||||
-rw-r--r-- | lib/Target/X86/X86NaClRewritePass.cpp | 56 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.cpp | 19 | ||||
-rw-r--r-- | test/CodeGen/X86/fast-isel-x86-64.ll | 8 | ||||
-rw-r--r-- | test/NaCl/X86/pnacl-avoids-r11-x86-64.c | 79 | ||||
-rw-r--r-- | test/NaCl/X86/pnacl-avoids-r11-x86-64.ll | 131 | ||||
-rw-r--r-- | test/NaCl/X86/pnacl-hides-sandbox-x86-64.c | 120 | ||||
-rw-r--r-- | test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll | 161 |
10 files changed, 767 insertions, 61 deletions
diff --git a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp index 9acaf68c82..63af3957fb 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCNaCl.cpp @@ -13,6 +13,8 @@ #include "MCTargetDesc/X86MCTargetDesc.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCNaCl.h" +#include "X86NaClDecls.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" @@ -28,12 +30,22 @@ using namespace llvm; // This option makes it possible to overwrite the x86 jmp mask immediate. // Setting it to -1 will effectively turn masking into a nop which will // help with linking this code with non-sandboxed libs (at least for x86-32). -cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", cl::init(-32)); +cl::opt<int> FlagSfiX86JmpMask("sfi-x86-jmp-mask", + cl::init(-kNaClX86InstructionBundleSize)); cl::opt<bool> FlagUseZeroBasedSandbox("sfi-zero-based-sandbox", cl::desc("Use a zero-based sandbox model" " for the NaCl SFI."), cl::init(false)); +// This flag can be set to false to test the performance impact of +// hiding the sandbox base. +cl::opt<bool> FlagHideSandboxBase("sfi-hide-sandbox-base", + cl::desc("Prevent 64-bit NaCl sandbox" + " pointers from being written to" + " the stack. [default=true]"), + cl::init(true)); + +const int kNaClX86InstructionBundleSize = 32; static unsigned PrefixSaved = 0; static bool PrefixPass = false; @@ -44,25 +56,134 @@ unsigned getX86SubSuperRegister_(unsigned Reg, EVT VT, bool High=false); unsigned DemoteRegTo32_(unsigned RegIn); } // namespace +static MCSymbol *CreateTempLabel(MCContext &Context, const char *Prefix) { + SmallString<128> NameSV; + raw_svector_ostream(NameSV) + << Context.getAsmInfo().getPrivateGlobalPrefix() // get internal label + << Prefix << Context.getUniqueSymbolID(); + return Context.GetOrCreateSymbol(NameSV); +} + static void EmitDirectCall(const MCOperand &Op, bool Is64Bit, MCStreamer &Out) { - Out.EmitBundleLock(true); + const bool HideSandboxBase = (FlagHideSandboxBase && + Is64Bit && !FlagUseZeroBasedSandbox); + if (HideSandboxBase) { + // For NaCl64, the sequence + // call target + // return_addr: + // is changed to + // push return_addr + // jmp target + // .align 32 + // return_addr: + // This avoids exposing the sandbox base address via the return + // address on the stack. + + MCContext &Context = Out.getContext(); + + // Generate a label for the return address. + MCSymbol *RetTarget = CreateTempLabel(Context, "DirectCallRetAddr"); + const MCExpr *RetTargetExpr = MCSymbolRefExpr::Create(RetTarget, Context); + + // push return_addr + MCInst PUSHInst; + PUSHInst.setOpcode(X86::PUSH64i32); + PUSHInst.addOperand(MCOperand::CreateExpr(RetTargetExpr)); + Out.EmitInstruction(PUSHInst); + + // jmp target + MCInst JMPInst; + JMPInst.setOpcode(X86::JMP_4); + JMPInst.addOperand(Op); + Out.EmitInstruction(JMPInst); - MCInst CALLInst; - CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); - CALLInst.addOperand(Op); - Out.EmitInstruction(CALLInst); - Out.EmitBundleUnlock(); + Out.EmitCodeAlignment(kNaClX86InstructionBundleSize); + Out.EmitLabel(RetTarget); + } else { + Out.EmitBundleLock(true); + + MCInst CALLInst; + CALLInst.setOpcode(Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32); + CALLInst.addOperand(Op); + Out.EmitInstruction(CALLInst); + Out.EmitBundleUnlock(); + } } static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall, MCStreamer &Out) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + const bool HideSandboxBase = (FlagHideSandboxBase && + Is64Bit && !FlagUseZeroBasedSandbox); const int JmpMask = FlagSfiX86JmpMask; - const unsigned Reg32 = Op.getReg(); + unsigned Reg32 = Op.getReg(); + + // For NaCl64, the sequence + // jmp *%rXX + // is changed to + // mov %rXX,%r11d + // and $0xffffffe0,%r11d + // add %r15,%r11 + // jmpq *%r11 + // + // And the sequence + // call *%rXX + // return_addr: + // is changed to + // mov %rXX,%r11d + // push return_addr + // and $0xffffffe0,%r11d + // add %r15,%r11 + // jmpq *%r11 + // .align 32 + // return_addr: + // + // This avoids exposing the sandbox base address via the return + // address on the stack. + + // For NaCl64, force an assignment of the branch target into r11, + // and subsequently use r11 as the ultimate branch target, so that + // only r11 (which will never be written to memory) exposes the + // sandbox base address. But avoid a redundant assignment if the + // original branch target is already r11 or r11d. + const unsigned SafeReg32 = X86::R11D; + const unsigned SafeReg64 = X86::R11; + if (HideSandboxBase) { + // In some cases, EmitIndirectBranch() is called with a 32-bit + // register Op (e.g. r11d), and in other cases a 64-bit register + // (e.g. r11), so we need to test both variants to avoid a + // redundant assignment. TODO(stichnot): Make callers consistent + // on 32 vs 64 bit register. + if ((Reg32 != SafeReg32) && (Reg32 != SafeReg64)) { + MCInst MOVInst; + MOVInst.setOpcode(X86::MOV32rr); + MOVInst.addOperand(MCOperand::CreateReg(SafeReg32)); + MOVInst.addOperand(MCOperand::CreateReg(Reg32)); + Out.EmitInstruction(MOVInst); + Reg32 = SafeReg32; + } + } const unsigned Reg64 = getX86SubSuperRegister_(Reg32, MVT::i64); - Out.EmitBundleLock(IsCall); + // Explicitly push the (32-bit) return address for a NaCl64 call + // instruction. + MCSymbol *RetTarget = NULL; + if (IsCall && HideSandboxBase) { + MCContext &Context = Out.getContext(); + + // Generate a label for the return address. + RetTarget = CreateTempLabel(Context, "IndirectCallRetAddr"); + const MCExpr *RetTargetExpr = MCSymbolRefExpr::Create(RetTarget, Context); + + // push return_addr + MCInst PUSHInst; + PUSHInst.setOpcode(X86::PUSH64i32); + PUSHInst.addOperand(MCOperand::CreateExpr(RetTargetExpr)); + Out.EmitInstruction(PUSHInst); + } + + const bool WillEmitCallInst = IsCall && !HideSandboxBase; + Out.EmitBundleLock(WillEmitCallInst); MCInst ANDInst; ANDInst.setOpcode(X86::AND32ri8); @@ -71,7 +192,7 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall, ANDInst.addOperand(MCOperand::CreateImm(JmpMask)); Out.EmitInstruction(ANDInst); - if (Is64Bit && !UseZeroBasedSandbox) { + if (Is64Bit && !FlagUseZeroBasedSandbox) { MCInst InstADD; InstADD.setOpcode(X86::ADD64rr); InstADD.addOperand(MCOperand::CreateReg(Reg64)); @@ -80,24 +201,40 @@ static void EmitIndirectBranch(const MCOperand &Op, bool Is64Bit, bool IsCall, Out.EmitInstruction(InstADD); } - if (IsCall) { + if (WillEmitCallInst) { + // callq *%rXX MCInst CALLInst; CALLInst.setOpcode(Is64Bit ? X86::CALL64r : X86::CALL32r); CALLInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32)); Out.EmitInstruction(CALLInst); } else { + // jmpq *%rXX -or- jmpq *%r11 MCInst JMPInst; JMPInst.setOpcode(Is64Bit ? X86::JMP64r : X86::JMP32r); JMPInst.addOperand(MCOperand::CreateReg(Is64Bit ? Reg64 : Reg32)); Out.EmitInstruction(JMPInst); } Out.EmitBundleUnlock(); + if (RetTarget) { + Out.EmitCodeAlignment(kNaClX86InstructionBundleSize); + Out.EmitLabel(RetTarget); + } } static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) { + // For NaCl64 returns, follow the convention of using r11 to hold + // the target of an indirect jump to avoid potentially leaking the + // sandbox base address. + const bool HideSandboxBase = (FlagHideSandboxBase && + Is64Bit && !FlagUseZeroBasedSandbox); + // For NaCl64 sandbox hiding, use r11 to hold the branch target. + // Otherwise, use rcx/ecx for fewer instruction bytes (no REX + // prefix). + const unsigned RegTarget = HideSandboxBase ? X86::R11 : + (Is64Bit ? X86::RCX : X86::ECX); MCInst POPInst; POPInst.setOpcode(Is64Bit ? X86::POP64r : X86::POP32r); - POPInst.addOperand(MCOperand::CreateReg(Is64Bit ? X86::RCX : X86::ECX)); + POPInst.addOperand(MCOperand::CreateReg(RegTarget)); Out.EmitInstruction(POPInst); if (AmtOp) { @@ -113,7 +250,7 @@ static void EmitRet(const MCOperand *AmtOp, bool Is64Bit, MCStreamer &Out) { MCInst JMPInst; JMPInst.setOpcode(Is64Bit ? X86::NACL_JMP64r : X86::NACL_JMP32r); - JMPInst.addOperand(MCOperand::CreateReg(X86::ECX)); + JMPInst.addOperand(MCOperand::CreateReg(RegTarget)); Out.EmitInstruction(JMPInst); } @@ -121,8 +258,7 @@ static void EmitTrap(bool Is64Bit, MCStreamer &Out) { // Rewrite to: // X86-32: mov $0, 0 // X86-64: mov $0, (%r15) - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; - unsigned BaseReg = Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0; + unsigned BaseReg = Is64Bit && !FlagUseZeroBasedSandbox ? X86::R15 : 0; MCInst Tmp; Tmp.setOpcode(X86::MOV32mi); @@ -140,8 +276,7 @@ static void EmitTrap(bool Is64Bit, MCStreamer &Out) { static void EmitRegFix(unsigned Reg64, MCStreamer &Out) { // lea (%rsp, %r15, 1), %rsp // We do not need to add the R15 base for the zero-based sandbox model - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; - if (!UseZeroBasedSandbox) { + if (!FlagUseZeroBasedSandbox) { MCInst Tmp; Tmp.setOpcode(X86::LEA64r); Tmp.addOperand(MCOperand::CreateReg(Reg64)); // DestReg @@ -215,9 +350,8 @@ static void EmitRegTruncate(unsigned Reg64, MCStreamer &Out) { static void HandleMemoryRefTruncation(MCInst *Inst, unsigned IndexOpPosition, MCStreamer &Out) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; unsigned IndexReg = Inst->getOperand(IndexOpPosition).getReg(); - if (UseZeroBasedSandbox) { + if (FlagUseZeroBasedSandbox) { // With the zero-based sandbox, we use a 32-bit register on the index Inst->getOperand(IndexOpPosition).setReg(DemoteRegTo32_(IndexReg)); } else { @@ -352,7 +486,6 @@ namespace llvm { // these instead of combined instructions. At this time, having only // one explicit prefix is supported. bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; // If we are emitting to .s, just emit all pseudo-instructions directly. if (Out.hasRawTextSupport()) { return false; @@ -473,7 +606,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) { unsigned PrefixLocal = PrefixSaved; PrefixSaved = 0; - if (PrefixLocal || !UseZeroBasedSandbox) + if (PrefixLocal || !FlagUseZeroBasedSandbox) Out.EmitBundleLock(false); HandleMemoryRefTruncation(&SandboxedInst, IndexOpPosition, Out); @@ -483,7 +616,7 @@ bool CustomExpandInstNaClX86(const MCInst &Inst, MCStreamer &Out) { EmitPrefix(PrefixLocal, Out); Out.EmitInstruction(SandboxedInst); - if (PrefixLocal || !UseZeroBasedSandbox) + if (PrefixLocal || !FlagUseZeroBasedSandbox) Out.EmitBundleUnlock(); return true; } diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index b024817891..89485cb06c 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -15,6 +15,7 @@ #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" +#include "X86NaClDecls.h" // @LOCALMOD #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/SmallSet.h" @@ -756,8 +757,52 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MFI->setOffsetAdjustment(-NumBytes); // Save EBP/RBP into the appropriate stack slot. + // @LOCALMOD-BEGIN + unsigned RegToPush = FramePtr; + const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); + const bool HideSandboxBase = (FlagHideSandboxBase && + Subtarget->isTargetNaCl64() && + !FlagUseZeroBasedSandbox); + if (HideSandboxBase) { + // Hide the sandbox base address by masking off the upper 32 + // bits of the pushed/saved RBP on the stack, using: + // mov %ebp, %r10d + // push %r10 + // instead of: + // push %rbp + // Additionally, we can use rax instead of r10 when it is not a + // varargs function and therefore rax is available, saving one + // byte of REX prefix per instruction. + // Note that the epilog already adds R15 when restoring RBP. + + // mov %ebp, %r10d + unsigned RegToPushLower; + if (Fn->isVarArg()) { + // Note: This use of r10 in the prolog can't be used with the + // gcc "nest" attribute, due to its use of r10. Example: + // target triple = "x86_64-pc-linux-gnu" + // define i64 @func(i64 nest %arg) { + // ret i64 %arg + // } + // + // $ clang -m64 llvm_nest_attr.ll -S -o - + // ... + // func: + // movq %r10, %rax + // ret + RegToPush = X86::R10; + RegToPushLower = X86::R10D; + } else { + RegToPush = X86::RAX; + RegToPushLower = X86::EAX; + } + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rr), RegToPushLower) + .addReg(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + // @LOCALMOD-END BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) - .addReg(FramePtr, RegState::Kill) + .addReg(RegToPush, RegState::Kill) // @LOCALMOD .setMIFlag(MachineInstr::FrameSetup); if (needsFrameMoves) { diff --git a/lib/Target/X86/X86NaClDecls.h b/lib/Target/X86/X86NaClDecls.h new file mode 100644 index 0000000000..4050187c68 --- /dev/null +++ b/lib/Target/X86/X86NaClDecls.h @@ -0,0 +1,28 @@ +//===-- X86NaClDecls.h - Common X86 NaCl declarations -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides various NaCl-related declarations for the X86-32 +// and X86-64 architectures. +// +//===----------------------------------------------------------------------===// + +#ifndef X86NACLDECLS_H +#define X86NACLDECLS_H + +#include "llvm/Support/CommandLine.h" + +using namespace llvm; + +extern const int kNaClX86InstructionBundleSize; + +extern cl::opt<bool> FlagRestrictR15; +extern cl::opt<bool> FlagUseZeroBasedSandbox; +extern cl::opt<bool> FlagHideSandboxBase; + +#endif // X86NACLDECLS_H diff --git a/lib/Target/X86/X86NaClRewritePass.cpp b/lib/Target/X86/X86NaClRewritePass.cpp index 846c72f452..8a131029e2 100644 --- a/lib/Target/X86/X86NaClRewritePass.cpp +++ b/lib/Target/X86/X86NaClRewritePass.cpp @@ -19,6 +19,7 @@ #include "X86.h" #include "X86InstrInfo.h" +#include "X86NaClDecls.h" #include "X86Subtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -32,7 +33,6 @@ using namespace llvm; -extern cl::opt<bool> FlagUseZeroBasedSandbox; cl::opt<bool> FlagRestrictR15("sfi-restrict-r15", cl::desc("Restrict use of %r15. This flag can" " be turned off for the zero-based" @@ -142,9 +142,8 @@ static bool IsDirectBranch(const MachineInstr &MI) { } static bool IsRegAbsolute(unsigned Reg) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; const bool RestrictR15 = FlagRestrictR15; - assert(UseZeroBasedSandbox || RestrictR15); + assert(FlagUseZeroBasedSandbox || RestrictR15); return (Reg == X86::RSP || Reg == X86::RBP || (Reg == X86::R15 && RestrictR15)); } @@ -219,7 +218,6 @@ X86NaClRewritePass::TraceLog(const char *func, bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; TraceLog("ApplyStackSFI", MBB, MBBI); assert(Is64Bit); MachineInstr &MI = *MBBI; @@ -249,7 +247,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, if (NewOpc) { BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addImm(MI.getOperand(2).getImm()) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); MI.eraseFromParent(); return true; } @@ -288,7 +286,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, const MachineOperand &Offset = MI.getOperand(4); BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_SPADJi32)) .addImm(Offset.getImm()) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); MI.eraseFromParent(); return true; } @@ -296,7 +294,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, if (Opc == X86::MOV32rr || Opc == X86::MOV64rr) { BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTSPr)) .addReg(DemoteRegTo32(MI.getOperand(1).getReg())) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); MI.eraseFromParent(); return true; } @@ -308,7 +306,7 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, .addOperand(MI.getOperand(3)) // Index .addOperand(MI.getOperand(4)) // Offset .addOperand(MI.getOperand(5)) // Segment - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); MI.eraseFromParent(); return true; } @@ -319,7 +317,6 @@ bool X86NaClRewritePass::ApplyStackSFI(MachineBasicBlock &MBB, bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; TraceLog("ApplyFrameSFI", MBB, MBBI); assert(Is64Bit); MachineInstr &MI = *MBBI; @@ -343,7 +340,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, // To: naclrestbp %eX, %rZP BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_RESTBPr)) .addReg(DemoteRegTo32(SrcReg)) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP MI.eraseFromParent(); return true; } @@ -353,7 +350,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, assert(MI.getOperand(0).getReg() == X86::RBP); // Zero-based sandbox model uses address clipping - if (UseZeroBasedSandbox) + if (FlagUseZeroBasedSandbox) return false; // Rewrite: mov %rbp, (...) @@ -364,7 +361,7 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, .addOperand(MI.getOperand(3)) // Index .addOperand(MI.getOperand(4)) // Offset .addOperand(MI.getOperand(5)) // Segment - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP MI.eraseFromParent(); return true; } @@ -389,11 +386,11 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, .addReg(0) // Index .addImm(0) // Offset .addReg(0) // Segment - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); // rZP + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); // rZP BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi8)) .addImm(8) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); MI.eraseFromParent(); return true; @@ -405,7 +402,8 @@ bool X86NaClRewritePass::ApplyFrameSFI(MachineBasicBlock &MBB, bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; + const bool HideSandboxBase = (FlagHideSandboxBase && + Is64Bit && !FlagUseZeroBasedSandbox); TraceLog("ApplyControlSFI", MBB, MBBI); MachineInstr &MI = *MBBI; @@ -436,7 +434,7 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, TII->get(NewOpc)) .addOperand(MI.getOperand(0)); if (Is64Bit) { - NewMI.addReg(UseZeroBasedSandbox ? 0 : X86::R15); + NewMI.addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); } MI.eraseFromParent(); return true; @@ -451,25 +449,32 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB, Opc == X86::RETI) { // To maintain compatibility with nacl-as, for now we don't emit naclret. // MI.setDesc(TII->get(Is64Bit ? X86::NACL_RET64 : X86::NACL_RET32)); + // + // For NaCl64 returns, follow the convention of using r11 to hold + // the target of an indirect jump to avoid potentially leaking the + // sandbox base address. + unsigned RegTarget; if (Is64Bit) { - BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), X86::RCX); + RegTarget = (HideSandboxBase ? X86::R11 : X86::RCX); + BuildMI(MBB, MBBI, DL, TII->get(X86::POP64r), RegTarget); if (Opc == X86::RETI) { BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_ASPi32)) .addOperand(MI.getOperand(0)) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); } BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP64r)) - .addReg(X86::ECX) - .addReg(UseZeroBasedSandbox ? 0 : X86::R15); + .addReg(RegTarget) + .addReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); } else { - BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), X86::ECX); + RegTarget = X86::ECX; + BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r), RegTarget); if (Opc == X86::RETI) { BuildMI(MBB, MBBI, DL, TII->get(X86::ADD32ri), X86::ESP) .addReg(X86::ESP) .addOperand(MI.getOperand(0)); } BuildMI(MBB, MBBI, DL, TII->get(X86::NACL_JMP32r)) - .addReg(X86::ECX); + .addReg(RegTarget); } MI.eraseFromParent(); return true; @@ -480,7 +485,7 @@ bool X86NaClRewritePass::ApplyControlSFI(MachineBasicBlock &MBB, // To maintain compatibility with nacl-as, for now we don't emit nacltrap. // MI.setDesc(TII->get(Is64Bit ? X86::NACL_TRAP64 : X86::NACL_TRAP32)); BuildMI(MBB, MBBI, DL, TII->get(X86::MOV32mi)) - .addReg(Is64Bit && !UseZeroBasedSandbox ? X86::R15 : 0) // Base + .addReg(Is64Bit && !FlagUseZeroBasedSandbox ? X86::R15 : 0) // Base .addImm(1) // Scale .addReg(0) // Index .addImm(0) // Offset @@ -502,7 +507,6 @@ bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB, TraceLog("ApplyMemorySFI", MBB, MBBI); assert(Is64Bit); MachineInstr &MI = *MBBI; - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; if (!IsLoad(MI) && !IsStore(MI)) return false; @@ -545,9 +549,9 @@ bool X86NaClRewritePass::ApplyMemorySFI(MachineBasicBlock &MBB, } else { if (!BaseReg.getReg()) { // No base, fill in relative. - BaseReg.setReg(UseZeroBasedSandbox ? 0 : X86::R15); + BaseReg.setReg(FlagUseZeroBasedSandbox ? 0 : X86::R15); AddrReg = IndexReg.getReg(); - } else if (!UseZeroBasedSandbox) { + } else if (!FlagUseZeroBasedSandbox) { // Switch base and index registers if index register is undefined. // That is do conversions like "mov d(%r,0,0) -> mov d(%r15, %r, 1)". assert (!IndexReg.getReg() diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index bab08b69df..67bac130b5 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -17,6 +17,7 @@ #include "X86.h" #include "X86InstrBuilder.h" #include "X86MachineFunctionInfo.h" +#include "X86NaClDecls.h" // @LOCALMOD #include "X86Subtarget.h" #include "X86TargetMachine.h" #include "llvm/ADT/BitVector.h" @@ -54,11 +55,6 @@ static cl::opt<bool> EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); -// @LOCALMOD-BEGIN -extern cl::opt<bool> FlagUseZeroBasedSandbox; -extern cl::opt<bool> FlagRestrictR15; -// @LOCALMOD-END - X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii) : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit() @@ -394,9 +390,8 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { // @LOCALMOD-START const X86Subtarget& Subtarget = MF.getTarget().getSubtarget<X86Subtarget>(); - const bool UseZeroBasedSandbox = FlagUseZeroBasedSandbox; const bool RestrictR15 = FlagRestrictR15; - assert(UseZeroBasedSandbox || RestrictR15); + assert(FlagUseZeroBasedSandbox || RestrictR15); if (Subtarget.isTargetNaCl64()) { if (RestrictR15) { Reserved.set(X86::R15); @@ -408,6 +403,16 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::EBP); Reserved.set(X86::BP); Reserved.set(X86::BPL); + const bool RestrictR11 = FlagHideSandboxBase && !FlagUseZeroBasedSandbox; + if (RestrictR11) { + // Restrict r11 so that it can be used for indirect jump + // sequences that don't leak the sandbox base address onto the + // stack. + Reserved.set(X86::R11); + Reserved.set(X86::R11D); + Reserved.set(X86::R11W); + Reserved.set(X86::R11B); + } } // @LOCALMOD-END diff --git a/test/CodeGen/X86/fast-isel-x86-64.ll b/test/CodeGen/X86/fast-isel-x86-64.ll index acfa64582c..ae1998cd54 100644 --- a/test/CodeGen/X86/fast-isel-x86-64.ll +++ b/test/CodeGen/X86/fast-isel-x86-64.ll @@ -55,8 +55,8 @@ define i32 @test3_nacl64() nounwind { ; NACL64_PIC: test3_nacl64: ; NACL64_PIC: movl G@GOTPCREL(%rip), %eax -; NACL64_PIC-NEXT: popq %rcx -; NACL64_PIC-NEXT: nacljmp %ecx, %r15 +; NACL64_PIC-NEXT: popq %r11 +; NACL64_PIC-NEXT: nacljmp %r11, %r15 } @@ -316,8 +316,8 @@ define void @test23(i8* noalias sret %result) { ; NACL64: test23: ; NACL64: call ; NACL64: movl %edi, %eax -; NACL64: popq %rcx -; NACL64: nacljmp %ecx, %r15 +; NACL64: popq %r11 +; NACL64: nacljmp %r11, %r15 } declare i8* @foo23() diff --git a/test/NaCl/X86/pnacl-avoids-r11-x86-64.c b/test/NaCl/X86/pnacl-avoids-r11-x86-64.c new file mode 100644 index 0000000000..65b921486f --- /dev/null +++ b/test/NaCl/X86/pnacl-avoids-r11-x86-64.c @@ -0,0 +1,79 @@ +/* + Object file built using: + pnacl-clang -S -O2 -emit-llvm -o pnacl-avoids-r11-x86-64.ll \ + pnacl-avoids-r11-x86-64.c + Then the comments below should be pasted into the .ll file, + replacing "RUNxxx" with "RUN". + +; The NACLON test verifies that %r11 and %r11d are not used except as +; part of the return sequence. +; +; RUNxxx: pnacl-llc -O2 -mtriple=x86_64-none-nacl < %s | \ +; RUNxxx: FileCheck %s --check-prefix=NACLON +; +; The NACLOFF test verifies that %r11 would normally be used if PNaCl +; weren't reserving r11 for its own uses, to be sure NACLON is a +; valid test. +; +; RUNxxx: pnacl-llc -O2 -mtriple=x86_64-linux < %s | \ +; RUNxxx: FileCheck %s --check-prefix=NACLOFF +; +; NACLON: RegisterPressure: +; NACLON-NOT: %r11 +; NACLON: popq %r11 +; NACLON: nacljmp %r11, %r15 +; +; NACLOFF: RegisterPressure: +; NACLOFF: %r11 +; NACLOFF: ret + +*/ + +// Function RegisterPressure() tries to induce maximal integer +// register pressure in a ~16 register machine, for both scratch and +// preserved registers. Repeated calls to Use() are designed to +// use all the preserved registers. The calculations on the local +// variables between function calls are designed to use all the +// scratch registers. + +void RegisterPressure(void) +{ + extern void Use(int, int, int, int, int, int, int, int, + int, int, int, int, int, int, int, int); + extern int GetValue(void); + extern volatile int v1a, v1b, v2a, v2b, v3a, v3b, v4a, v4b; + + int i00 = GetValue(); + int i01 = GetValue(); + int i02 = GetValue(); + int i03 = GetValue(); + int i04 = GetValue(); + int i05 = GetValue(); + int i06 = GetValue(); + int i07 = GetValue(); + int i08 = GetValue(); + int i09 = GetValue(); + int i10 = GetValue(); + int i11 = GetValue(); + int i12 = GetValue(); + int i13 = GetValue(); + int i14 = GetValue(); + int i15 = GetValue(); + + Use(i00, i01, i02, i03, i04, i05, i06, i07, + i08, i09, i10, i11, i12, i13, i14, i15); + Use(i00, i01, i02, i03, i04, i05, i06, i07, + i08, i09, i10, i11, i12, i13, i14, i15); + v1a = i00 + i01 + i02 + i03 + i04 + i05 + i06 + i07; + v1b = i08 + i09 + i10 + i11 + i12 + i13 + i14 + i15; + v2a = i00 + i01 + i02 + i03 + i08 + i09 + i10 + i11; + v2b = i04 + i05 + i06 + i07 + i12 + i13 + i14 + i15; + v3a = i00 + i01 + i04 + i05 + i08 + i09 + i12 + i13; + v3b = i02 + i03 + i06 + i07 + i10 + i11 + i14 + i15; + v4a = i00 + i02 + i04 + i06 + i08 + i10 + i12 + i14; + v4b = i01 + i03 + i05 + i07 + i09 + i11 + i13 + i15; + Use(i00, i01, i02, i03, i04, i05, i06, i07, + i08, i09, i10, i11, i12, i13, i14, i15); + Use(i00, i01, i02, i03, i04, i05, i06, i07, + i08, i09, i10, i11, i12, i13, i14, i15); +} diff --git a/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll b/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll new file mode 100644 index 0000000000..04cb4bfd3c --- /dev/null +++ b/test/NaCl/X86/pnacl-avoids-r11-x86-64.ll @@ -0,0 +1,131 @@ +; The NACLON test verifies that %r11 and %r11d are not used except as +; part of the return sequence. +; +; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl < %s | \ +; RUN: FileCheck %s --check-prefix=NACLON +; +; The NACLOFF test verifies that %r11 would normally be used if PNaCl +; weren't reserving r11 for its own uses, to be sure NACLON is a +; valid test. +; +; RUN: pnacl-llc -O2 -mtriple=x86_64-linux < %s | \ +; RUN: FileCheck %s --check-prefix=NACLOFF +; +; NACLON: RegisterPressure: +; NACLON-NOT: %r11 +; NACLON: popq %r11 +; NACLON: nacljmp %r11, %r15 +; +; NACLOFF: RegisterPressure: +; NACLOFF: %r11 +; NACLOFF: ret +; ModuleID = 'pnacl-avoids-r11-x86-64.c' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" +target triple = "le32-unknown-nacl" + +@v1a = external global i32 +@v1b = external global i32 +@v2a = external global i32 +@v2b = external global i32 +@v3a = external global i32 +@v3b = external global i32 +@v4a = external global i32 +@v4b = external global i32 + +; Function Attrs: nounwind +define void @RegisterPressure() #0 { +entry: + %call = tail call i32 @GetValue() #2 + %call1 = tail call i32 @GetValue() #2 + %call2 = tail call i32 @GetValue() #2 + %call3 = tail call i32 @GetValue() #2 + %call4 = tail call i32 @GetValue() #2 + %call5 = tail call i32 @GetValue() #2 + %call6 = tail call i32 @GetValue() #2 + %call7 = tail call i32 @GetValue() #2 + %call8 = tail call i32 @GetValue() #2 + %call9 = tail call i32 @GetValue() #2 + %call10 = tail call i32 @GetValue() #2 + %call11 = tail call i32 @GetValue() #2 + %call12 = tail call i32 @GetValue() #2 + %call13 = tail call i32 @GetValue() #2 + %call14 = tail call i32 @GetValue() #2 + %call15 = tail call i32 @GetValue() #2 + tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2 + tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2 + %add = add nsw i32 %call1, %call + %add16 = add nsw i32 %add, %call2 + %add17 = add nsw i32 %add16, %call3 + %add18 = add nsw i32 %add17, %call4 + %add19 = add nsw i32 %add18, %call5 + %add20 = add nsw i32 %add19, %call6 + %add21 = add nsw i32 %add20, %call7 + store volatile i32 %add21, i32* @v1a, align 4, !tbaa !0 + %add22 = add nsw i32 %call9, %call8 + %add23 = add nsw i32 %add22, %call10 + %add24 = add nsw i32 %add23, %call11 + %add25 = add nsw i32 %add24, %call12 + %add26 = add nsw i32 %add25, %call13 + %add27 = add nsw i32 %add26, %call14 + %add28 = add nsw i32 %add27, %call15 + store volatile i32 %add28, i32* @v1b, align 4, !tbaa !0 + %add32 = add nsw i32 %call8, %add17 + %add33 = add nsw i32 %add32, %call9 + %add34 = add nsw i32 %add33, %call10 + %add35 = add nsw i32 %add34, %call11 + store volatile i32 %add35, i32* @v2a, align 4, !tbaa !0 + %add36 = add nsw i32 %call5, %call4 + %add37 = add nsw i32 %add36, %call6 + %add38 = add nsw i32 %add37, %call7 + %add39 = add nsw i32 %add38, %call12 + %add40 = add nsw i32 %add39, %call13 + %add41 = add nsw i32 %add40, %call14 + %add42 = add nsw i32 %add41, %call15 + store volatile i32 %add42, i32* @v2b, align 4, !tbaa !0 + %add44 = add nsw i32 %call4, %add + %add45 = add nsw i32 %add44, %call5 + %add46 = add nsw i32 %add45, %call8 + %add47 = add nsw i32 %add46, %call9 + %add48 = add nsw i32 %add47, %call12 + %add49 = add nsw i32 %add48, %call13 + store volatile i32 %add49, i32* @v3a, align 4, !tbaa !0 + %add50 = add nsw i32 %call3, %call2 + %add51 = add nsw i32 %add50, %call6 + %add52 = add nsw i32 %add51, %call7 + %add53 = add nsw i32 %add52, %call10 + %add54 = add nsw i32 %add53, %call11 + %add55 = add nsw i32 %add54, %call14 + %add56 = add nsw i32 %add55, %call15 + store volatile i32 %add56, i32* @v3b, align 4, !tbaa !0 + %add57 = add nsw i32 %call2, %call + %add58 = add nsw i32 %add57, %call4 + %add59 = add nsw i32 %add58, %call6 + %add60 = add nsw i32 %add59, %call8 + %add61 = add nsw i32 %add60, %call10 + %add62 = add nsw i32 %add61, %call12 + %add63 = add nsw i32 %add62, %call14 + store volatile i32 %add63, i32* @v4a, align 4, !tbaa !0 + %add64 = add nsw i32 %call3, %call1 + %add65 = add nsw i32 %add64, %call5 + %add66 = add nsw i32 %add65, %call7 + %add67 = add nsw i32 %add66, %call9 + %add68 = add nsw i32 %add67, %call11 + %add69 = add nsw i32 %add68, %call13 + %add70 = add nsw i32 %add69, %call15 + store volatile i32 %add70, i32* @v4b, align 4, !tbaa !0 + tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2 + tail call void @Use(i32 %call, i32 %call1, i32 %call2, i32 %call3, i32 %call4, i32 %call5, i32 %call6, i32 %call7, i32 %call8, i32 %call9, i32 %call10, i32 %call11, i32 %call12, i32 %call13, i32 %call14, i32 %call15) #2 + ret void +} + +declare i32 @GetValue() #1 + +declare void @Use(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} diff --git a/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c new file mode 100644 index 0000000000..b1fc9ce83d --- /dev/null +++ b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.c @@ -0,0 +1,120 @@ +/* + Object file built using: + pnacl-clang -S -O0 -emit-llvm -o pnacl-hides-sandbox-x86-64.ll \ + pnacl-hides-sandbox-x86-64.c + Then the comments below should be pasted into the .ll file. + +; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \ +; RUN: llvm-objdump -d -r - | FileCheck %s +; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \ +; RUN: llvm-objdump -d -r - | FileCheck %s --check-prefix=NOCALLRET +; +; CHECK: TestDirectCall: +; Push the immediate return address +; CHECK: pushq $0 +; CHECK-NEXT: .text +; Immediate jump to the target +; CHECK: jmpq 0 +; CHECK-NEXT: DirectCallTarget +; Return label +; CHECK: DirectCallRetAddr +; +; CHECK: TestIndirectCall: +; Push the immediate return address +; CHECK: pushq $0 +; CHECK-NEXT: .text +; Fixed sequence for indirect jump +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 +; Return label +; CHECK: IndirectCallRetAddr +; +; Verify that the old frame pointer isn't leaked when saved +; CHECK: TestMaskedFramePointer: +; CHECK: movl %ebp, %eax +; CHECK: pushq %rax +; CHECK: movq %rsp, %rbp +; +; Verify use of r10 instead of rax in the presence of varargs, +; when saving the old rbp. +; CHECK: TestMaskedFramePointerVarargs: +; CHECK: movl %ebp, %r10d +; CHECK: pushq %r10 +; CHECK: movq %rsp, %rbp +; +; Test the indirect jump sequence derived from a "switch" statement. +; CHECK: TestIndirectJump: +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 +; At least 4 "jmp"s due to 5 switch cases +; CHECK: jmp +; CHECK: jmp +; CHECK: jmp +; CHECK: jmp +; At least 1 direct call to puts() +; CHECK: pushq $0 +; CHECK-NEXT: .text +; CHECK: jmpq 0 +; CHECK-NEXT: puts +; +; Return sequence is just the indirect jump sequence +; CHECK: TestReturn: +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 +; +; Special test that no "call" or "ret" instructions are generated. +; NOCALLRET-NOT: call +; NOCALLRET-NOT: ret +*/ + +#include <stdlib.h> +#include <stdio.h> + +void TestDirectCall(void) { + extern void DirectCallTarget(void); + DirectCallTarget(); +} + +void TestIndirectCall(void) { + extern void (*IndirectCallTarget)(void); + IndirectCallTarget(); +} + +void TestMaskedFramePointer(int Arg) { + extern void Consume(void *); + // Calling alloca() is one way to force the rbp frame pointer. + void *Tmp = alloca(Arg); + Consume(Tmp); +} + +void TestMaskedFramePointerVarargs(int Arg, ...) { + extern void Consume(void *); + void *Tmp = alloca(Arg); + Consume(Tmp); +} + +void TestIndirectJump(int Arg) { + switch (Arg) { + case 2: + puts("Prime 1"); + break; + case 3: + puts("Prime 2"); + break; + case 5: + puts("Prime 3"); + break; + case 7: + puts("Prime 4"); + break; + case 11: + puts("Prime 5"); + break; + } +} + +void TestReturn(void) { +} diff --git a/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll new file mode 100644 index 0000000000..e64798f66b --- /dev/null +++ b/test/NaCl/X86/pnacl-hides-sandbox-x86-64.ll @@ -0,0 +1,161 @@ +; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \ +; RUN: llvm-objdump -d -r - | FileCheck %s +; RUN: pnacl-llc -O2 -mtriple=x86_64-none-nacl -filetype=obj < %s | \ +; RUN: llvm-objdump -d -r - | FileCheck %s --check-prefix=NOCALLRET + +; ModuleID = 'pnacl-hides-sandbox-x86-64.c' +target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32" +target triple = "le32-unknown-nacl" + +@IndirectCallTarget = external global void ()* +@.str = private unnamed_addr constant [8 x i8] c"Prime 1\00", align 1 +@.str1 = private unnamed_addr constant [8 x i8] c"Prime 2\00", align 1 +@.str2 = private unnamed_addr constant [8 x i8] c"Prime 3\00", align 1 +@.str3 = private unnamed_addr constant [8 x i8] c"Prime 4\00", align 1 +@.str4 = private unnamed_addr constant [8 x i8] c"Prime 5\00", align 1 + +; Function Attrs: nounwind +define void @TestDirectCall() #0 { +entry: + call void @DirectCallTarget() + ret void +} +; CHECK: TestDirectCall: +; Push the immediate return address +; CHECK: pushq $0 +; CHECK-NEXT: .text +; Immediate jump to the target +; CHECK: jmpq 0 +; CHECK-NEXT: DirectCallTarget + +declare void @DirectCallTarget() #1 + +; Function Attrs: nounwind +define void @TestIndirectCall() #0 { +entry: + %0 = load void ()** @IndirectCallTarget, align 4 + call void %0() + ret void +} +; CHECK: TestIndirectCall: +; Push the immediate return address +; CHECK: pushq $0 +; CHECK-NEXT: .text +; Fixed sequence for indirect jump +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 + +; Function Attrs: nounwind +define void @TestMaskedFramePointer(i32 %Arg) #0 { +entry: + %Arg.addr = alloca i32, align 4 + %Tmp = alloca i8*, align 4 + store i32 %Arg, i32* %Arg.addr, align 4 + %0 = load i32* %Arg.addr, align 4 + %1 = alloca i8, i32 %0 + store i8* %1, i8** %Tmp, align 4 + %2 = load i8** %Tmp, align 4 + call void @Consume(i8* %2) + ret void +} +; Verify that the old frame pointer isn't leaked when saved +; CHECK: TestMaskedFramePointer: +; CHECK: movl %ebp, %eax +; CHECK: pushq %rax +; CHECK: movq %rsp, %rbp + +declare void @Consume(i8*) #1 + +; Function Attrs: nounwind +define void @TestMaskedFramePointerVarargs(i32 %Arg, ...) #0 { +entry: + %Arg.addr = alloca i32, align 4 + %Tmp = alloca i8*, align 4 + store i32 %Arg, i32* %Arg.addr, align 4 + %0 = load i32* %Arg.addr, align 4 + %1 = alloca i8, i32 %0 + store i8* %1, i8** %Tmp, align 4 + %2 = load i8** %Tmp, align 4 + call void @Consume(i8* %2) + ret void +} +; Verify use of r10 instead of rax in the presence of varargs, +; when saving the old rbp. +; CHECK: TestMaskedFramePointerVarargs: +; CHECK: movl %ebp, %r10d +; CHECK: pushq %r10 +; CHECK: movq %rsp, %rbp + +; Function Attrs: nounwind +define void @TestIndirectJump(i32 %Arg) #0 { +entry: + %Arg.addr = alloca i32, align 4 + store i32 %Arg, i32* %Arg.addr, align 4 + %0 = load i32* %Arg.addr, align 4 + switch i32 %0, label %sw.epilog [ + i32 2, label %sw.bb + i32 3, label %sw.bb1 + i32 5, label %sw.bb3 + i32 7, label %sw.bb5 + i32 11, label %sw.bb7 + ] + +sw.bb: ; preds = %entry + %call = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0)) + br label %sw.epilog + +sw.bb1: ; preds = %entry + %call2 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0)) + br label %sw.epilog + +sw.bb3: ; preds = %entry + %call4 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str2, i32 0, i32 0)) + br label %sw.epilog + +sw.bb5: ; preds = %entry + %call6 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str3, i32 0, i32 0)) + br label %sw.epilog + +sw.bb7: ; preds = %entry + %call8 = call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @.str4, i32 0, i32 0)) + br label %sw.epilog + +sw.epilog: ; preds = %entry, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb + ret void +} +; Test the indirect jump sequence derived from a "switch" statement. +; CHECK: TestIndirectJump: +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 +; At least 4 "jmp"s due to 5 switch cases +; CHECK: jmp +; CHECK: jmp +; CHECK: jmp +; CHECK: jmp +; At least 1 direct call to puts() +; CHECK: pushq $0 +; CHECK-NEXT: .text +; CHECK: jmpq 0 +; CHECK-NEXT: puts + +declare i32 @puts(i8*) #1 + +; Function Attrs: nounwind +define void @TestReturn() #0 { +entry: + ret void +} +; Return sequence is just the indirect jump sequence +; CHECK: TestReturn: +; CHECK: andl $-32, %r11d +; CHECK-NEXT: addq %r15, %r11 +; CHECK-NEXT: jmpq *%r11 + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + +; Special test that no "call" or "ret" instructions are generated. +; NOCALLRET-NOT: call +; NOCALLRET-NOT: ret |