aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp20
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp2
-rw-r--r--test/CodeGen/X86/clobber-fi0.ll37
3 files changed, 58 insertions, 1 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 1d5457297d..cf6c5c86d3 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -625,6 +625,22 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const {
return CompactUnwindEncoding;
}
+/// colobbersTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool colobbersTheStack(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ for (MachineRegisterInfo::reg_iterator ri = MRI.reg_begin(X86::EFLAGS),
+ re = MRI.reg_end(); ri != re; ++ri)
+ if (ri->isCopy())
+ return true;
+
+ return false;
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -673,12 +689,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
// function, and use up to 128 bytes of stack space, don't have a frame
// pointer, calls, or dynamic alloca then we do not need to adjust the
- // stack pointer (we fit in the Red Zone).
+ // stack pointer (we fit in the Red Zone). We also check that we don't
+ // push and pop from the stack.
if (Is64Bit && !Fn->getFnAttributes().hasAttribute(Attribute::NoRedZone) &&
!RegInfo->needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
+ !colobbersTheStack(MF) && // Don't push and pop.
!MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 7bbab3862d..fbfbc29a71 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2892,6 +2892,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
// Moving EFLAGS to / from another register requires a push and a pop.
+ // Notice that we have to adjust the stack if we don't want to clobber the
+ // first frame index. See X86FrameLowering.cpp - colobbersTheStack.
if (SrcReg == X86::EFLAGS) {
if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
diff --git a/test/CodeGen/X86/clobber-fi0.ll b/test/CodeGen/X86/clobber-fi0.ll
new file mode 100644
index 0000000000..38a42dbf1a
--- /dev/null
+++ b/test/CodeGen/X86/clobber-fi0.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.0"
+
+; In the code below we need to copy the EFLAGS because of scheduling constraints.
+; When copying the EFLAGS we need to write to the stack with push/pop. This forces
+; us to emit the prolog.
+
+; CHECK: main
+; CHECK: subq{{.*}}rsp
+; CHECK: ret
+define i32 @main(i32 %arg, i8** %arg1) nounwind {
+bb:
+ %tmp = alloca i32, align 4 ; [#uses=3 type=i32*]
+ %tmp2 = alloca i32, align 4 ; [#uses=3 type=i32*]
+ %tmp3 = alloca i32 ; [#uses=1 type=i32*]
+ store i32 1, i32* %tmp, align 4
+ store i32 1, i32* %tmp2, align 4
+ br label %bb4
+
+bb4: ; preds = %bb4, %bb
+ %tmp6 = load i32* %tmp2, align 4 ; [#uses=1 type=i32]
+ %tmp7 = add i32 %tmp6, -1 ; [#uses=2 type=i32]
+ store i32 %tmp7, i32* %tmp2, align 4
+ %tmp8 = icmp eq i32 %tmp7, 0 ; [#uses=1 type=i1]
+ %tmp9 = load i32* %tmp ; [#uses=1 type=i32]
+ %tmp10 = add i32 %tmp9, -1 ; [#uses=1 type=i32]
+ store i32 %tmp10, i32* %tmp3
+ br i1 %tmp8, label %bb11, label %bb4
+
+bb11: ; preds = %bb4
+ %tmp12 = load i32* %tmp, align 4 ; [#uses=1 type=i32]
+ ret i32 %tmp12
+}
+
+