diff options
-rw-r--r-- | lib/Target/X86/X86FrameLowering.cpp | 20 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 2 | ||||
-rw-r--r-- | test/CodeGen/X86/clobber-fi0.ll | 37 |
3 files changed, 58 insertions, 1 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 1d5457297d..cf6c5c86d3 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { return CompactUnwindEncoding; } +/// colobbersTheStack - This function checks if any of the users of EFLAGS +/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has +/// to use the stack, and if we don't adjust the stack we clobber the first +/// frame index. +/// See X86InstrInfo::copyPhysReg. +static bool colobbersTheStack(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS), + re = MRI.reg_end(); ri != re; ++ri) + if (ri->isCopy()) + return true; + + return false; +} + /// emitPrologue - Push callee-saved registers onto the stack, which /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to @@ -673,12 +689,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // If this is x86-64 and the Red Zone is not disabled, if we are a leaf // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the - // stack pointer (we fit in the Red Zone). + // stack pointer (we fit in the Red Zone). We also check that we don't + // push and pop from the stack. if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) && !RegInfo->needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. !MFI->adjustsStack() && // No calls. !IsWin64 && // Win64 has no Red Zone + !colobbersTheStack(MF) && // Don't push and pop. !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (HasFP) MinSize += SlotSize; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7bbab3862d..fbfbc29a71 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2892,6 +2892,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - colobbersTheStack. if (SrcReg == X86::EFLAGS) { if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll new file mode 100644 index 0000000000..38a42dbf1a --- /dev/null +++ b/test/CodeGen/X86/clobber-fi0.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.0" + +; In the code below we need to copy the EFLAGS because of scheduling constraints. +; When copying the EFLAGS we need to write to the stack with push/pop. This forces +; us to emit the prolog. + +; CHECK: main +; CHECK: subq{{.*}}rsp +; CHECK: ret +define i32 @main(i32 %arg, i8** %arg1) nounwind { +bb: + %tmp = alloca i32, align 4 ; [#uses=3 type=i32*] + %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*] + %tmp3 = alloca i32 ; [#uses=1 type=i32*] + store i32 1, i32* %tmp, align 4 + store i32 1, i32* %tmp2, align 4 + br label %bb4 + +bb4: ; preds = %bb4, %bb + %tmp6 = load i32* %tmp2, align 4 ; [#uses=1 type=i32] + %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32] + store i32 %tmp7, i32* %tmp2, align 4 + %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1] + %tmp9 = load i32* %tmp ; [#uses=1 type=i32] + %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32] + store i32 %tmp10, i32* %tmp3 + br i1 %tmp8, label %bb11, label %bb4 + +bb11: ; preds = %bb4 + %tmp12 = load i32* %tmp, align 4 ; [#uses=1 type=i32] + ret i32 %tmp12 +} + + |