aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRafael Espindola <rafael.espindola@gmail.com>2012-01-11 19:00:37 +0000
committerRafael Espindola <rafael.espindola@gmail.com>2012-01-11 19:00:37 +0000
commit2028b793e1fd1a8dd4d99b0b7c9972865d5e806a (patch)
tree7ef8a689686717facb1837bc11b510c2ee1f9f79
parent7692ce9e810ed1707da46faf20c84f1ffd54bc55 (diff)
Support segmented stacks on mac.
This uses TLS slot 90, which actually belongs to JavaScriptCore. We only support frames with static size Patch by Brian Anderson. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147960 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp83
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
-rw-r--r--test/CodeGen/X86/segmented-stacks.ll279
3 files changed, 273 insertions, 92 deletions
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 4386762c85..4cda76c0a4 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1298,10 +1298,15 @@ HasNestArgument(const MachineFunction *MF) {
return false;
}
+
+/// GetScratchRegister - Get a register for performing work in the segmented
+/// stack prologue. Depending on platform and the properties of the function
+/// either one or two registers will be needed. Set primary to true for
+/// the first register, false for the second.
static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
+GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
if (Is64Bit) {
- return X86::R11;
+ return Primary ? X86::R11 : X86::R12;
} else {
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
bool IsNested = HasNestArgument(&MF);
@@ -1313,13 +1318,13 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF) {
"nested function.");
return -1;
} else {
- return X86::EAX;
+ return Primary ? X86::EAX : X86::ECX;
}
} else {
if (IsNested)
- return X86::EDX;
+ return Primary ? X86::EDX : X86::EAX;
else
- return X86::ECX;
+ return Primary ? X86::ECX : X86::EAX;
}
}
}
@@ -1339,14 +1344,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
DebugLoc DL;
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
- unsigned ScratchReg = GetScratchRegister(Is64Bit, MF);
+ unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
"Scratch register is live-in");
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
- if (!ST->isTargetLinux())
- report_fatal_error("Segmented stacks supported only on linux.");
+ if (!ST->isTargetLinux() && !ST->isTargetDarwin())
+ report_fatal_error("Segmented stacks supported only on linux and darwin.");
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
@@ -1377,12 +1382,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
// prologue.
StackSize = MFI->getStackSize();
+ // When the frame size is less than 256 we just compare the stack
+ // boundary directly to the value of the stack pointer, per gcc.
+ bool CompareStackPointer = StackSize < kSplitStackAvailable;
+
// Read the limit off the current stacklet off the stack_guard location.
if (Is64Bit) {
- TlsReg = X86::FS;
- TlsOffset = 0x70;
+ if (ST->isTargetLinux()) {
+ TlsReg = X86::FS;
+ TlsOffset = 0x70;
+ } else if (ST->isTargetDarwin()) {
+ TlsReg = X86::GS;
+ TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
+ }
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::RSP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
@@ -1392,16 +1406,55 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else {
TlsReg = X86::GS;
- TlsOffset = 0x30;
- if (StackSize < kSplitStackAvailable)
+ if (CompareStackPointer)
ScratchReg = X86::ESP;
else
BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
.addImm(1).addReg(0).addImm(-StackSize).addReg(0);
- BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
- .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ if (ST->isTargetLinux()) {
+ TlsOffset = 0x30;
+
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
+ .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
+ } else if (ST->isTargetDarwin()) {
+ TlsOffset = 0x48 + 90*4;
+
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ unsigned ScratchReg2;
+ bool SaveScratch2;
+ if (CompareStackPointer) {
+ // The primary scratch register is available for holding the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+ SaveScratch2 = false;
+ } else {
+ // Need to use a second register to hold the TLS offset
+ ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+
+ // Unfortunately, with fastcc the second scratch register may hold an arg
+ SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
+ }
+
+ // If Scratch2 is live-in then it needs to be saved
+ assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
+ "Scratch register is live-in and not saved");
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
+ .addReg(ScratchReg2, RegState::Kill);
+
+ BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
+ .addImm(TlsOffset);
+ BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
+ .addReg(ScratchReg)
+ .addReg(ScratchReg2).addImm(1).addReg(0)
+ .addImm(0)
+ .addReg(TlsReg);
+
+ if (SaveScratch2)
+ BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
+ }
}
// This jump is taken if SP >= (Stacklet Limit + Stack Space required).
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index d73a3dd7f3..b8002d57eb 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -102,9 +102,6 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
// default to hard float ABI
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
-
- if (Options.EnableSegmentedStacks && !Subtarget.isTargetELF())
- report_fatal_error("Segmented stacks are only implemented on ELF.");
}
//===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/X86/segmented-stacks.ll b/test/CodeGen/X86/segmented-stacks.ll
index 3ba18cffcc..6e91d00ac6 100644
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@@ -1,9 +1,13 @@
-; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
+; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
; We used to crash with filetype=obj
; RUN: llc < %s -mtriple=i686-linux -segmented-stacks -filetype=obj
; RUN: llc < %s -mtriple=x86_64-linux -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=i686-darwin -segmented-stacks -filetype=obj
+; RUN: llc < %s -mtriple=x86_64-darwin -segmented-stacks -filetype=obj
; Just to prevent the alloca from being optimized away
declare void @dummy_use(i32*, i32)
@@ -13,25 +17,46 @@ define void @test_basic() {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_basic:
+; X32-Linux: test_basic:
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB0_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB0_2
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
-; X64: test_basic:
+; X64-Linux: test_basic:
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB0_2
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB0_2
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_basic:
+
+; X32-Darwin: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %esp
+; X32-Darwin-NEXT: ja LBB0_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_basic:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB0_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -40,23 +65,42 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) {
%result = add i32 %other, %addend
ret i32 %result
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB1_2
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB1_2
+
+; X32-Linux: pushl $4
+; X32-Linux-NEXT: pushl $0
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB1_2
+
+; X64-Linux: movq %r10, %rax
+; X64-Linux-NEXT: movabsq $0, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+; X64-Linux-NEXT: movq %rax, %r10
-; X32: pushl $4
-; X32-NEXT: pushl $0
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: movl $432, %edx
+; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
+; X32-Darwin-NEXT: ja LBB1_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB1_2
+; X32-Darwin: pushl $4
+; X32-Darwin-NEXT: pushl $0
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movq %r10, %rax
-; X64-NEXT: movabsq $0, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
-; X64-NEXT: movq %rax, %r10
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB1_2
+
+; X64-Darwin: movq %r10, %rax
+; X64-Darwin-NEXT: movabsq $0, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+; X64-Darwin-NEXT: movq %rax, %r10
}
@@ -65,23 +109,42 @@ define void @test_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: leal -40012(%esp), %ecx
-; X32-NEXT: cmpl %gs:48, %ecx
-; X32-NEXT: ja .LBB2_2
+; X32-Linux: leal -40012(%esp), %ecx
+; X32-Linux-NEXT: cmpl %gs:48, %ecx
+; X32-Linux-NEXT: ja .LBB2_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB2_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: leal -40012(%esp), %ecx
+; X32-Darwin-NEXT: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
+; X32-Darwin-NEXT: ja LBB2_2
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB2_2
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB2_2
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -90,25 +153,46 @@ define fastcc void @test_fastcc() {
call void @dummy_use (i32* %mem, i32 10)
ret void
-; X32: test_fastcc:
+; X32-Linux: test_fastcc:
+
+; X32-Linux: cmpl %gs:48, %esp
+; X32-Linux-NEXT: ja .LBB3_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $60
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc:
+
+; X64-Linux: cmpq %fs:112, %rsp
+; X64-Linux-NEXT: ja .LBB3_2
-; X32: cmpl %gs:48, %esp
-; X32-NEXT: ja .LBB3_2
+; X64-Linux: movabsq $40, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
-; X32: pushl $0
-; X32-NEXT: pushl $60
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc:
-; X64: test_fastcc:
+; X32-Darwin: movl $432, %eax
+; X32-Darwin-NEXT: cmpl %gs:(%eax), %esp
+; X32-Darwin-NEXT: ja LBB3_2
-; X64: cmpq %fs:112, %rsp
-; X64-NEXT: ja .LBB3_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $60
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
+; X64-Darwin: test_fastcc:
+
+; X64-Darwin: cmpq %gs:816, %rsp
+; X64-Darwin-NEXT: ja LBB3_2
+
+; X64-Darwin: movabsq $40, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
}
@@ -117,25 +201,72 @@ define fastcc void @test_fastcc_large() {
call void @dummy_use (i32* %mem, i32 0)
ret void
-; X32: test_fastcc_large:
+; X32-Linux: test_fastcc_large:
+
+; X32-Linux: leal -40012(%esp), %eax
+; X32-Linux-NEXT: cmpl %gs:48, %eax
+; X32-Linux-NEXT: ja .LBB4_2
+
+; X32-Linux: pushl $0
+; X32-Linux-NEXT: pushl $40012
+; X32-Linux-NEXT: calll __morestack
+; X32-Linux-NEXT: ret
+
+; X64-Linux: test_fastcc_large:
+
+; X64-Linux: leaq -40008(%rsp), %r11
+; X64-Linux-NEXT: cmpq %fs:112, %r11
+; X64-Linux-NEXT: ja .LBB4_2
+
+; X64-Linux: movabsq $40008, %r10
+; X64-Linux-NEXT: movabsq $0, %r11
+; X64-Linux-NEXT: callq __morestack
+; X64-Linux-NEXT: ret
+
+; X32-Darwin: test_fastcc_large:
+
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: ja LBB4_2
+
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
+
+; X64-Darwin: test_fastcc_large:
+
+; X64-Darwin: leaq -40008(%rsp), %r11
+; X64-Darwin-NEXT: cmpq %gs:816, %r11
+; X64-Darwin-NEXT: ja LBB4_2
+
+; X64-Darwin: movabsq $40008, %r10
+; X64-Darwin-NEXT: movabsq $0, %r11
+; X64-Darwin-NEXT: callq ___morestack
+; X64-Darwin-NEXT: ret
+
+}
+
+define fastcc void @test_fastcc_large_with_ecx_arg(i32 %a) {
+ %mem = alloca i32, i32 10000
+ call void @dummy_use (i32* %mem, i32 %a)
+ ret void
-; X32: leal -40012(%esp), %eax
-; X32-NEXT: cmpl %gs:48, %eax
-; X32-NEXT: ja .LBB4_2
+; This is testing that the Mac implementation preserves ecx
-; X32: pushl $0
-; X32-NEXT: pushl $40012
-; X32-NEXT: calll __morestack
-; X32-NEXT: ret
+; X32-Darwin: test_fastcc_large_with_ecx_arg:
-; X64: test_fastcc_large:
+; X32-Darwin: leal -40012(%esp), %eax
+; X32-Darwin-NEXT: pushl %ecx
+; X32-Darwin-NEXT: movl $432, %ecx
+; X32-Darwin-NEXT: cmpl %gs:(%ecx), %eax
+; X32-Darwin-NEXT: popl %ecx
+; X32-Darwin-NEXT: ja LBB5_2
-; X64: leaq -40008(%rsp), %r11
-; X64-NEXT: cmpq %fs:112, %r11
-; X64-NEXT: ja .LBB4_2
+; X32-Darwin: pushl $0
+; X32-Darwin-NEXT: pushl $40012
+; X32-Darwin-NEXT: calll ___morestack
+; X32-Darwin-NEXT: ret
-; X64: movabsq $40008, %r10
-; X64-NEXT: movabsq $0, %r11
-; X64-NEXT: callq __morestack
-; X64-NEXT: ret
}