diff options
-rw-r--r-- | lib/Target/ARM/ARMCallingConv.h | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCallingConv.td | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 1 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll | 95 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll | 61 | ||||
-rw-r--r-- | test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll | 28 |
6 files changed, 192 insertions, 2 deletions
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index e6e8c3d5fa..4f94ad2403 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -74,9 +74,15 @@ static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT, static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 }; static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 }; static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 }; + static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); if (Reg == 0) { + + // If we had R3 unallocated only, now we still must to waste it. + Reg = State.AllocateReg(GPRArgRegs, 4); + assert((!Reg || Reg == ARM::R3) && "Wrong GPRs usage for f64"); + // For the 2nd half of a v2f64, do not just fail. if (CanFail) return false; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 9966f6c3f6..8ff666ed28 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -111,8 +111,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" - "ArgFlags.getOrigAlign() != 8", + CCIfType<[i32], CCIf<"ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>, diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 23d7ef1290..a9fe221e6d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1738,6 +1738,7 @@ ARMTargetLowering::HandleByVal( State->getCallOrPrologue() == Call) && "unhandled ParmContext"); if ((!State->isFirstByValRegValid()) && + (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) && (ARM::R0 <= reg) && (reg <= ARM::R3)) { if (Subtarget->isAAPCS_ABI() && Align > 4) { unsigned AlignInRegs = Align / 4; diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll new file mode 100644 index 0000000000..38d515f922 --- /dev/null +++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C4-vs-VFP.ll @@ -0,0 +1,95 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.4 statement, when NSAA is not +;equal to SP. +; +; Our purpose: make NSAA != SP, and only after start to use GPRs. +; +;Co-Processor register candidates may be either in VFP or in stack, so after +;all VFP are allocated, stack is used. We can use stack without GPR allocation +;in that case, passing 9 f64 params, for example. +;First eight params goes to d0-d7, ninth one goes to the stack. +;Now, as 10th parameter, we pass i32, and it must go to R0. +; +;5.5 Parameter Passing, Stage C: +; +;C.2.cp If the argument is a CPRC then any co-processor registers in that class +;that are unallocated are marked as unavailable. The NSAA is adjusted upwards +;until it is correctly aligned for the argument and the argument is copied to +;the memory at the adjusted NSAA. The NSAA is further incremented by the size +;of the argument. The argument has now been allocated. +;... +;C.4 If the size in words of the argument is not more than r4 minus NCRN, the +;argument is copied into core registers, starting at the NCRN. The NCRN is +;incremented by the number of registers used. Successive registers hold the +;parts of the argument they would hold if its value were loaded into those +;registers from memory using an LDM instruction. The argument has now been +;allocated. +; +;What is actually checked here: +;Here we check that i32 param goes to r0. +; +;Current test-case was produced with command: +;arm-linux-gnueabihf-clang -mcpu=cortex-a9 params-to-GPR.c -S -O1 -emit-llvm +; +;// params-to-GRP.c: +; +;void fooUseI32(unsigned); +; +;void foo(long double p0, +; long double p1, +; long double p2, +; long double p3, +; long double p4, +; long double p5, +; long double p6, +; long double p7, +; long double p8, +; unsigned p9) { +; fooUseI32(p9); +;} +; +;void doFoo() { +; foo( 1,2,3,4,5,6,7,8,9, 43 ); +;} + +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s +; +;CHECK: foo: +;CHECK-NOT: mov r0 +;CHECK-NOT: ldr r0 +;CHECK: bl fooUseI32 +;CHECK: doFoo: +;CHECK: movs r0, #43 +;CHECK: bl foo + +define void @foo(double %p0, ; --> D0 + double %p1, ; --> D1 + double %p2, ; --> D2 + double %p3, ; --> D3 + double %p4, ; --> D4 + double %p5, ; --> D5 + double %p6, ; --> D6 + double %p7, ; --> D7 + double %p8, ; --> Stack + i32 %p9) #0 { ; --> R0, not Stack+8 +entry: + tail call void @fooUseI32(i32 %p9) + ret void +} + +declare void @fooUseI32(i32) + +define void @doFoo() { +entry: + tail call void @foo(double 23.0, ; --> D0 + double 23.1, ; --> D1 + double 23.2, ; --> D2 + double 23.3, ; --> D3 + double 23.4, ; --> D4 + double 23.5, ; --> D5 + double 23.6, ; --> D6 + double 23.7, ; --> D7 + double 23.8, ; --> Stack + i32 43) ; --> R0, not Stack+8 + ret void +} + diff --git a/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll new file mode 100644 index 0000000000..446403d79c --- /dev/null +++ b/test/CodeGen/ARM/2013-04-16-AAPCS-C5-vs-VFP.ll @@ -0,0 +1,61 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.5 statement, when NSAA is not +;equal to SP. +; +; Our purpose: make NSAA != SP, and only after start to use GPRs, then pass +; byval parameter and check that it goes to stack only. +; +;Co-Processor register candidates may be either in VFP or in stack, so after +;all VFP are allocated, stack is used. We can use stack without GPR allocation +;in that case, passing 9 f64 params, for example. +;First eight params goes to d0-d7, ninth one goes to the stack. +;Now, as 10th parameter, we pass i32, and it must go to R0. +; +;For more information, +;please, read 5.5 Parameter Passing, Stage C, stages C.2.cp, C.4 and C.5 +; +; +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +%struct_t = type { i32, i32, i32, i32 } +@static_val = constant %struct_t { i32 777, i32 888, i32 999, i32 1000 } +declare void @fooUseStruct(%struct_t*) + +define void @foo2(double %p0, ; --> D0 + double %p1, ; --> D1 + double %p2, ; --> D2 + double %p3, ; --> D3 + double %p4, ; --> D4 + double %p5, ; --> D5 + double %p6, ; --> D6 + double %p7, ; --> D7 + double %p8, ; --> Stack + i32 %p9, ; --> R0 + %struct_t* byval %p10) ; --> Stack+8 +{ +entry: +;CHECK: push.w {r11, lr} +;CHECK-NOT: stm +;CHECK: add r0, sp, #16 +;CHECK: bl fooUseStruct + call void @fooUseStruct(%struct_t* %p10) + + ret void +} + +define void @doFoo2() { +entry: +;CHECK-NOT: ldm + tail call void @foo2(double 23.0, ; --> D0 + double 23.1, ; --> D1 + double 23.2, ; --> D2 + double 23.3, ; --> D3 + double 23.4, ; --> D4 + double 23.5, ; --> D5 + double 23.6, ; --> D6 + double 23.7, ; --> D7 + double 23.8, ; --> Stack + i32 43, ; --> R0, not Stack+8 + %struct_t* byval @static_val) ; --> Stack+8, not R1 + ret void +} + diff --git a/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll new file mode 100644 index 0000000000..de5fd31e2f --- /dev/null +++ b/test/CodeGen/ARM/2013-04-21-AAPCS-VA-C.1.cp.ll @@ -0,0 +1,28 @@ +;Check 5.5 Parameter Passing --> Stage C --> C.1.cp statement for VA functions. +;Note: There are no VFP CPRCs in a variadic procedure. +;Check that after %C was sent to stack, we set Next Core Register Number to R4. + +;This test is simplified IR version of +;test-suite/SingleSource/UnitTests/2002-05-02-ManyArguments.c + +;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s + +@.str = private unnamed_addr constant [13 x i8] c"%d %d %f %i\0A\00", align 1 + +;CHECK: printfn: +define void @printfn(i32 %a, i16 signext %b, double %C, i8 signext %E) { +entry: + %conv = sext i16 %b to i32 + %conv1 = sext i8 %E to i32 + %call = tail call i32 (i8*, ...)* @printf( + i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), ; --> R0 + i32 %a, ; --> R1 + i32 %conv, ; --> R2 + double %C, ; --> SP, NCRN := R4 +;CHECK: str r2, [sp, #8] + i32 %conv1) ; --> SP+8 + ret void +} + +declare i32 @printf(i8* nocapture, ...) + |