aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/Target/ARM/ARMCallingConv.h6
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp1
-rw-r--r--test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll95
-rw-r--r--test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll61
-rw-r--r--test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll28
6 files changed, 192 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index e6e8c3d5fa..4f94ad2403 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
+
+ // If we had R3 unallocated only, now we still must to waste it.
+ Reg = State.AllocateReg(GPRArgRegs, 4);
+ assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64");
+
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
return false;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 9966f6c3f6..8ff666ed28 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
- "ArgFlags.getOrigAlign() != 8",
+ CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 23d7ef1290..a9fe221e6d 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1738,6 +1738,7 @@ ARMTargetLowering::HandleByVal(
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
if ((!State->isFirstByValRegValid()) &&
+ (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) &&
(ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
new file mode 100644
index 0000000000..38d515f922
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll
@@ -0,0 +1,95 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.4 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs.
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;5.5 Parameter Passing, Stage C:
+;
+;C.2.cp If the argument is a CPRC then any co-processor registers in that class
+;that are unallocated are marked as unavailable. The NSAA is adjusted upwards
+;until it is correctly aligned for the argument and the argument is copied to
+;the memory at the adjusted NSAA. The NSAA is further incremented by the size
+;of the argument. The argument has now been allocated.
+;...
+;C.4 If the size in words of the argument is not more than r4 minus NCRN, the
+;argument is copied into core registers, starting at the NCRN. The NCRN is
+;incremented by the number of registers used. Successive registers hold the
+;parts of the argument they would hold if its value were loaded into those
+;registers from memory using an LDM instruction. The argument has now been
+;allocated.
+;
+;What is actually checked here:
+;Here we check that i32 param goes to r0.
+;
+;Current test-case was produced with command:
+;arm-linux-gnueabihf-clang -mcpu=cortex-a9 params-to-GPR.c -S -O1 -emit-llvm
+;
+;// params-to-GRP.c:
+;
+;void fooUseI32(unsigned);
+;
+;void foo(long double p0,
+; long double p1,
+; long double p2,
+; long double p3,
+; long double p4,
+; long double p5,
+; long double p6,
+; long double p7,
+; long double p8,
+; unsigned p9) {
+; fooUseI32(p9);
+;}
+;
+;void doFoo() {
+; foo( 1,2,3,4,5,6,7,8,9, 43 );
+;}
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+;
+;CHECK: foo:
+;CHECK-NOT: mov r0
+;CHECK-NOT: ldr r0
+;CHECK: bl fooUseI32
+;CHECK: doFoo:
+;CHECK: movs r0, #43
+;CHECK: bl foo
+
+define void @foo(double %p0, ; --> D0
+ double %p1, ; --> D1
+ double %p2, ; --> D2
+ double %p3, ; --> D3
+ double %p4, ; --> D4
+ double %p5, ; --> D5
+ double %p6, ; --> D6
+ double %p7, ; --> D7
+ double %p8, ; --> Stack
+ i32 %p9) #0 { ; --> R0, not Stack+8
+entry:
+ tail call void @fooUseI32(i32 %p9)
+ ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+ tail call void @foo(double 23.0, ; --> D0
+ double 23.1, ; --> D1
+ double 23.2, ; --> D2
+ double 23.3, ; --> D3
+ double 23.4, ; --> D4
+ double 23.5, ; --> D5
+ double 23.6, ; --> D6
+ double 23.7, ; --> D7
+ double 23.8, ; --> Stack
+ i32 43) ; --> R0, not Stack+8
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
new file mode 100644
index 0000000000..446403d79c
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll
@@ -0,0 +1,61 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.5 statement, when NSAA is not
+;equal to SP.
+;
+; Our purpose: make NSAA != SP, and only after start to use GPRs, then pass
+; byval parameter and check that it goes to stack only.
+;
+;Co-Processor register candidates may be either in VFP or in stack, so after
+;all VFP are allocated, stack is used. We can use stack without GPR allocation
+;in that case, passing 9 f64 params, for example.
+;First eight params goes to d0-d7, ninth one goes to the stack.
+;Now, as 10th parameter, we pass i32, and it must go to R0.
+;
+;For more information,
+;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5
+;
+;
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%struct_t = type { i32, i32, i32, i32 }
+@static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 }
+declare void @fooUseStruct(%struct_t*)
+
+define void @foo2(double %p0, ; --> D0
+ double %p1, ; --> D1
+ double %p2, ; --> D2
+ double %p3, ; --> D3
+ double %p4, ; --> D4
+ double %p5, ; --> D5
+ double %p6, ; --> D6
+ double %p7, ; --> D7
+ double %p8, ; --> Stack
+ i32 %p9, ; --> R0
+ %struct_t* byval %p10) ; --> Stack+8
+{
+entry:
+;CHECK: push.w {r11, lr}
+;CHECK-NOT: stm
+;CHECK: add r0, sp, #16
+;CHECK: bl fooUseStruct
+ call void @fooUseStruct(%struct_t* %p10)
+
+ ret void
+}
+
+define void @doFoo2() {
+entry:
+;CHECK-NOT: ldm
+ tail call void @foo2(double 23.0, ; --> D0
+ double 23.1, ; --> D1
+ double 23.2, ; --> D2
+ double 23.3, ; --> D3
+ double 23.4, ; --> D4
+ double 23.5, ; --> D5
+ double 23.6, ; --> D6
+ double 23.7, ; --> D7
+ double 23.8, ; --> Stack
+ i32 43, ; --> R0, not Stack+8
+ %struct_t* byval @static_val) ; --> Stack+8, not R1
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
new file mode 100644
index 0000000000..de5fd31e2f
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll
@@ -0,0 +1,28 @@
+;Check 5.5 Parameter Passing --> Stage C --> C.1.cp statement for VA functions.
+;Note: There are no VFP CPRCs in a variadic procedure.
+;Check that after %C was sent to stack, we set Next Core Register Number to R4.
+
+;This test is simplified IR version of
+;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c
+
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+@.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1
+
+;CHECK: printfn:
+define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) {
+entry:
+ %conv = sext i16 %b to i32
+ %conv1 = sext i8 %E to i32
+ %call = tail call i32 (i8*, ...)* @printf(
+ i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0
+ i32 %a, ; --> R1
+ i32 %conv, ; --> R2
+ double %C, ; --> SP, NCRN := R4
+;CHECK: str r2, [sp, #8]
+ i32 %conv1) ; --> SP+8
+ ret void
+}
+
+declare i32 @printf(i8* nocapture, ...)
+