aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStepan Dyatkovskiy <stpworld@narod.ru>2013-05-05 07:48:36 +0000
committerStepan Dyatkovskiy <stpworld@narod.ru>2013-05-05 07:48:36 +0000
commit46abfcf4187432da728cbe452c32143da077e07f (patch)
tree1166ab9d35b6cf746dd4198825afe6afa52b8716
parent46e7e6f39e8a4efec0cc45ca346b40683288ff9e (diff)
For ARM backend, fixed "byval" attribute support.
Now even the small structures could be passed within byval (small enough to be stored in GPRs). In regression tests next function prototypes are checked: PR15293: %artz = type { i32 } define void @foo(%artz* byval %s) define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) foo: "s" stored in R0 foo2: "s" stored in R0, "s2" stored in R2. Next AAPCS rules are checked: 5.5 Parameters Passing, C.4 and C.5, "ParamSize" is parameter size in 32bit words: -- NSAA != 0, NCRN < R4 and NCRN+ParamSize > R4. Parameter should be sent to the stack; NCRN := R4. -- NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4. Parameter stored in GPRs; NCRN += ParamSize. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181148 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/CodeGen/CallingConvLower.h97
-rw-r--r--lib/CodeGen/CallingConvLower.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp135
-rw-r--r--lib/Target/ARM/ARMISelLowering.h2
-rw-r--r--test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll73
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll48
-rw-r--r--test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll45
7 files changed, 360 insertions, 42 deletions
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index c035e0777c..fa9d60f0d4 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -163,8 +163,56 @@ private:
unsigned StackOffset;
SmallVector<uint32_t, 16> UsedRegs;
- unsigned FirstByValReg;
- bool FirstByValRegValid;
+
+ // ByValInfo and SmallVector<ByValInfo, 4> ByValRegs:
+ //
+ // Vector of ByValInfo instances (ByValRegs) is introduced for byval registers
+ // tracking.
+ // Or, in another words it tracks byval parameters that are stored in
+ // general purpose registers.
+ //
+ // For 4 byte stack alignment,
+ // instance index means byval parameter number in formal
+ // arguments set. Assume, we have some "struct_type" with size = 4 bytes,
+ // then, for function "foo":
+ //
+ // i32 foo(i32 %p, %struct_type* %r, i32 %s, %struct_type* %t)
+ //
+ // ByValRegs[0] describes how "%r" is stored (Begin == r1, End == r2)
+ // ByValRegs[1] describes how "%t" is stored (Begin == r3, End == r4).
+ //
+ // In case of 8 bytes stack alignment,
+ // ByValRegs may also contain information about wasted registers.
+ // In function shown above, r3 would be wasted according to AAPCS rules.
+ // And in that case ByValRegs[1].Waste would be "true".
+ // ByValRegs vector size still would be 2,
+ // while "%t" goes to the stack: it wouldn't be described in ByValRegs.
+ //
+ // Supposed use-case for this collection:
+ // 1. Initially ByValRegs is empty, InRegsParamsProceed is 0.
+ // 2. HandleByVal fillups ByValRegs.
+ // 3. Argument analysis (LowerFormatArguments, for example). After
+ // some byval argument was analyzed, InRegsParamsProceed is increased.
+ struct ByValInfo {
+ ByValInfo(unsigned B, unsigned E, bool IsWaste = false) :
+ Begin(B), End(E), Waste(IsWaste) {}
+ // First register allocated for current parameter.
+ unsigned Begin;
+
+ // First after last register allocated for current parameter.
+ unsigned End;
+
+ // Means that current range of registers doesn't belong to any
+ // parameters. It was wasted due to stack alignment rules.
+ // For more information see:
+ // AAPCS, 5.5 Parameter Passing, Stage C, C.3.
+ bool Waste;
+ };
+ SmallVector<ByValInfo, 4 > ByValRegs;
+
+ // InRegsParamsProceed - shows how many instances of ByValRegs was proceed
+ // during argument analysis.
+ unsigned InRegsParamsProceed;
protected:
ParmContext CallOrPrologue;
@@ -306,12 +354,45 @@ public:
MVT LocVT, CCValAssign::LocInfo LocInfo,
int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
- // First GPR that carries part of a byval aggregate that's split
- // between registers and memory.
- unsigned getFirstByValReg() const { return FirstByValRegValid ? FirstByValReg : 0; }
- void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; }
- void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; }
- bool isFirstByValRegValid() const { return FirstByValRegValid; }
+ // Returns count of byval arguments that are to be stored (even partly)
+ // in registers.
+ unsigned getInRegsParamsCount() const { return ByValRegs.size(); }
+
+ // Returns count of byval in-regs arguments proceed.
+ unsigned getInRegsParamsProceed() const { return InRegsParamsProceed; }
+
+ // Get information about N-th byval parameter that is stored in registers.
+ // Here "ByValParamIndex" is N.
+ void getInRegsParamInfo(unsigned InRegsParamRecordIndex,
+ unsigned& BeginReg, unsigned& EndReg) const {
+ assert(InRegsParamRecordIndex < ByValRegs.size() &&
+ "Wrong ByVal parameter index");
+
+ const ByValInfo& info = ByValRegs[InRegsParamRecordIndex];
+ BeginReg = info.Begin;
+ EndReg = info.End;
+ }
+
+ // Add information about parameter that is kept in registers.
+ void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd) {
+ ByValRegs.push_back(ByValInfo(RegBegin, RegEnd));
+ }
+
+ // Goes either to next byval parameter (excluding "waste" record), or
+ // to the end of collection.
+ // Returns false, if end is reached.
+ bool nextInRegsParam() {
+ unsigned e = ByValRegs.size();
+ if (InRegsParamsProceed < e)
+ ++InRegsParamsProceed;
+ return InRegsParamsProceed < e;
+ }
+
+ // Clear byval registers tracking info.
+ void clearByValRegsInfo() {
+ InRegsParamsProceed = 0;
+ ByValRegs.clear();
+ }
ParmContext getCallOrPrologue() const { return CallOrPrologue; }
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index f1d4ace922..75f4b96e3b 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
// No stack is used.
StackOffset = 0;
- clearFirstByValReg();
+ clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9475f1b5a0..0f7beb1e3b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1481,10 +1481,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// True if this byval aggregate will be split between registers
// and memory.
- if (CCInfo.isFirstByValRegValid()) {
+ unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
+ unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
+
+ if (CurByValIdx < ByValArgsCount) {
+
+ unsigned RegBegin, RegEnd;
+ CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
+
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned int i, j;
- for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
+ for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, MVT::i32);
SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
@@ -1493,11 +1500,15 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(j, Load));
}
- offset = ARM::R4 - CCInfo.getFirstByValReg();
- CCInfo.clearFirstByValReg();
+
+ // If parameter size outsides register area, "offset" value
+ // helps us to calculate stack slot for remained part properly.
+ offset = RegEnd - RegBegin;
+
+ CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() - 4*offset > 0) {
+ if (Flags.getByValSize() > 4*offset) {
unsigned LocMemOffset = VA.getLocMemOffset();
SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
@@ -1740,9 +1751,24 @@ ARMTargetLowering::HandleByVal(
assert((State->getCallOrPrologue() == Prologue ||
State->getCallOrPrologue() == Call) &&
"unhandled ParmContext");
- if ((!State->isFirstByValRegValid()) &&
- (!Subtarget->isAAPCS_ABI() || State->getNextStackOffset() == 0) &&
- (ARM::R0 <= reg) && (reg <= ARM::R3)) {
+
+ // For in-prologue parameters handling, we also introduce stack offset
+ // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
+ // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
+ // NSAA should be evaluted (NSAA means "next stacked argument address").
+ // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
+ // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
+ unsigned NSAAOffset = State->getNextStackOffset();
+ if (State->getCallOrPrologue() != Call) {
+ for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
+ unsigned RB, RE;
+ State->getInRegsParamInfo(i, RB, RE);
+ assert(NSAAOffset >= (RE-RB)*4 &&
+ "Stack offset for byval regs doesn't introduced anymore?");
+ NSAAOffset -= (RE-RB)*4;
+ }
+ }
+ if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
if (Subtarget->isAAPCS_ABI() && Align > 4) {
unsigned AlignInRegs = Align / 4;
unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
@@ -1750,22 +1776,45 @@ ARMTargetLowering::HandleByVal(
reg = State->AllocateReg(GPRArgRegs, 4);
}
if (reg != 0) {
- State->setFirstByValReg(reg);
+ unsigned excess = 4 * (ARM::R4 - reg);
+
+ // Special case when NSAA != SP and parameter size greater than size of
+ // all remained GPR regs. In that case we can't split parameter, we must
+ // send it to stack. We also must set NCRN to R4, so waste all
+ // remained registers.
+ if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
+ while (State->AllocateReg(GPRArgRegs, 4))
+ ;
+ return;
+ }
+
+ // First register for byval parameter is the first register that wasn't
+ // allocated before this method call, so it would be "reg".
+ // If parameter is small enough to be saved in range [reg, r4), then
+ // the end (first after last) register would be reg + param-size-in-regs,
+ // else parameter would be splitted between registers and stack,
+ // end register would be r4 in this case.
+ unsigned ByValRegBegin = reg;
+ unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4;
+ State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
+ // Note, first register is allocated in the beginning of function already,
+ // allocate remained amount of registers we need.
+ for (unsigned i = reg+1; i != ByValRegEnd; ++i)
+ State->AllocateReg(GPRArgRegs, 4);
// At a call site, a byval parameter that is split between
// registers and memory needs its size truncated here. In a
// function prologue, such byval parameters are reassembled in
// memory, and are not truncated.
if (State->getCallOrPrologue() == Call) {
- unsigned excess = 4 * (ARM::R4 - reg);
- assert(size >= excess && "expected larger existing stack allocation");
- size -= excess;
+ // Make remained size equal to 0 in case, when
+ // the whole structure may be stored into registers.
+ if (size < excess)
+ size = 0;
+ else
+ size -= excess;
}
}
}
- // Confiscate any remaining parameter registers to preclude their
- // assignment to subsequent parameters.
- while (State->AllocateReg(GPRArgRegs, 4))
- ;
}
/// MatchingStackOffset - Return true if the given stack call argument is
@@ -2580,13 +2629,16 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
void
ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize)
const {
unsigned NumGPRs;
- if (CCInfo.isFirstByValRegValid())
- NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
- else {
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ unsigned RBegin, REnd;
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ NumGPRs = REnd - RBegin;
+ } else {
unsigned int firstUnalloced;
firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
sizeof(GPRArgRegs) /
@@ -2611,6 +2663,7 @@ int
ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
bool ForceMutable) const {
@@ -2629,24 +2682,26 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- unsigned firstRegToSaveIndex;
- if (CCInfo.isFirstByValRegValid())
- firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
- else {
+ unsigned firstRegToSaveIndex, lastRegToSaveIndex;
+ unsigned RBegin, REnd;
+ if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
+ CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
+ firstRegToSaveIndex = RBegin - ARM::R0;
+ lastRegToSaveIndex = REnd - ARM::R0;
+ } else {
firstRegToSaveIndex = CCInfo.getFirstUnallocated
(GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+ lastRegToSaveIndex = 4;
}
unsigned ArgRegsSize, ArgRegsSaveSize;
- computeRegArea(CCInfo, MF, ArgRegsSize, ArgRegsSaveSize);
+ computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize);
// Store any by-val regs to their spots on the stack so that they may be
// loaded by deferencing the result of formal parameter pointer or va_next.
// Note: once stack area for byval/varargs registers
// was initialized, it can't be initialized again.
- if (!AFI->getArgRegsSaveSize() && ArgRegsSaveSize) {
-
- AFI->setArgRegsSaveSize(ArgRegsSaveSize);
+ if (ArgRegsSaveSize) {
int FrameIndex = MFI->CreateFixedObject(
ArgRegsSaveSize,
@@ -2655,7 +2710,8 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
- for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
+ for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
+ ++firstRegToSaveIndex, ++i) {
const TargetRegisterClass *RC;
if (AFI->isThumb1OnlyFunction())
RC = &ARM::tGPRRegClass;
@@ -2672,6 +2728,9 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
}
+
+ AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
+
if (!MemOps.empty())
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
&MemOps[0], MemOps.size());
@@ -2696,7 +2755,8 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, 0, ArgOffset, ForceMutable);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
+ 0, ArgOffset, ForceMutable);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -2727,6 +2787,12 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue ArgValue;
Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
unsigned CurArgIdx = 0;
+
+ // Initially ArgRegsSaveSize is zero.
+ // Then we increase this value each time we meet byval parameter.
+ // We also increase this value in case of varargs function.
+ AFI->setArgRegsSaveSize(0);
+
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
@@ -2824,12 +2890,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// Since they could be overwritten by lowering of arguments in case of
// a tail call.
if (Flags.isByVal()) {
+ unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
int FrameIndex = StoreByValRegs(
- CCInfo, DAG, dl, Chain, CurOrigArg,
- Ins[VA.getValNo()].PartOffset,
- VA.getLocMemOffset(),
- true /*force mutable frames*/);
+ CCInfo, DAG, dl, Chain, CurOrigArg,
+ CurByValIndex,
+ Ins[VA.getValNo()].PartOffset,
+ VA.getLocMemOffset(),
+ true /*force mutable frames*/);
InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
+ CCInfo.nextInRegsParam();
} else {
int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
VA.getLocMemOffset(), true);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 46b8438676..426010e295 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -477,6 +477,7 @@ namespace llvm {
int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
DebugLoc dl, SDValue &Chain,
const Value *OrigArg,
+ unsigned InRegsParamRecordIdx,
unsigned OffsetFromOrigArg,
unsigned ArgOffset,
bool ForceMutable) const;
@@ -487,6 +488,7 @@ namespace llvm {
bool ForceMutable = false) const;
void computeRegArea(CCState &CCInfo, MachineFunction &MF,
+ unsigned InRegsParamRecordIdx,
unsigned &ArgRegsSize,
unsigned &ArgRegsSaveSize) const;
diff --git a/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
new file mode 100644
index 0000000000..4a5ca9db0e
--- /dev/null
+++ b/test/CodeGen/ARM/2013-04-05-Small-ByVal-Structs-PR15293.ll
@@ -0,0 +1,73 @@
+;PR15293: ARM codegen ice - expected larger existing stack allocation
+;RUN: llc -mtriple=arm-linux-gnueabihf < %s | FileCheck %s
+
+;CHECK: foo:
+;CHECK: sub sp, sp, #8
+;CHECK: push {r11, lr}
+;CHECK: str r0, [sp, #12]
+;CHECK: add r0, sp, #12
+;CHECK: bl fooUseParam
+;CHECK: pop {r11, lr}
+;CHECK: add sp, sp, #8
+;CHECK: mov pc, lr
+
+;CHECK: foo2:
+;CHECK: sub sp, sp, #16
+;CHECK: push {r11, lr}
+;CHECK: str r0, [sp, #12]
+;CHECK: add r0, sp, #12
+;CHECK: str r2, [sp, #16]
+;CHECK: bl fooUseParam
+;CHECK: add r0, sp, #16
+;CHECK: bl fooUseParam
+;CHECK: pop {r11, lr}
+;CHECK: add sp, sp, #16
+;CHECK: mov pc, lr
+
+;CHECK: doFoo:
+;CHECK: push {r11, lr}
+;CHECK: ldr r0,
+;CHECK: ldr r0, [r0]
+;CHECK: bl foo
+;CHECK: pop {r11, lr}
+;CHECK: mov pc, lr
+
+
+;CHECK: doFoo2:
+;CHECK: push {r11, lr}
+;CHECK: ldr r0,
+;CHECK: mov r1, #0
+;CHECK: ldr r0, [r0]
+;CHECK: mov r2, r0
+;CHECK: bl foo2
+;CHECK: pop {r11, lr}
+;CHECK: mov pc, lr
+
+
+%artz = type { i32 }
+@static_val = constant %artz { i32 777 }
+
+declare void @fooUseParam(%artz* )
+
+define void @foo(%artz* byval %s) {
+ call void @fooUseParam(%artz* %s)
+ ret void
+}
+
+define void @foo2(%artz* byval %s, i32 %p, %artz* byval %s2) {
+ call void @fooUseParam(%artz* %s)
+ call void @fooUseParam(%artz* %s2)
+ ret void
+}
+
+
+define void @doFoo() {
+ call void @foo(%artz* byval @static_val)
+ ret void
+}
+
+define void @doFoo2() {
+ call void @foo2(%artz* byval @static_val, i32 0, %artz* byval @static_val)
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
new file mode 100644
index 0000000000..6db71fed95
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
@@ -0,0 +1,48 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize < R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888}
+
+declare void @fooUseStruct(%st_t*)
+
+define void @foo(double %vfp0, ; --> D0, NSAA=SP
+ double %vfp1, ; --> D1, NSAA=SP
+ double %vfp2, ; --> D2, NSAA=SP
+ double %vfp3, ; --> D3, NSAA=SP
+ double %vfp4, ; --> D4, NSAA=SP
+ double %vfp5, ; --> D5, NSAA=SP
+ double %vfp6, ; --> D6, NSAA=SP
+ double %vfp7, ; --> D7, NSAA=SP
+ double %vfp8, ; --> SP, NSAA=SP+8 (!)
+ i32 %p0, ; --> R0, NSAA=SP+8
+ %st_t* byval %p1, ; --> R1, R2, NSAA=SP+8
+ i32 %p2, ; --> R3, NSAA=SP+8
+ i32 %p3) #0 { ; --> SP+4, NSAA=SP+12
+entry:
+ ;CHECK: sub sp, #8
+ ;CHECK: push.w {r11, lr}
+ ;CHECK: add r0, sp, #16
+ ;CHECK: str r2, [sp, #20]
+ ;CHECK: str r1, [sp, #16]
+ ;CHECK: bl fooUseStruct
+ call void @fooUseStruct(%st_t* %p1)
+ ret void
+}
+
+define void @doFoo() {
+entry:
+ call void @foo(double 23.0,
+ double 23.1,
+ double 23.2,
+ double 23.3,
+ double 23.4,
+ double 23.5,
+ double 23.6,
+ double 23.7,
+ double 23.8,
+ i32 0, %st_t* byval @static_val, i32 1, i32 2)
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
new file mode 100644
index 0000000000..212bbc2ee9
--- /dev/null
+++ b/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP2.ll
@@ -0,0 +1,45 @@
+;Check AAPCS, 5.5 Parameters Passing, C4 and C5 rules.
+;Check case when NSAA != 0, and NCRN < R4, NCRN+ParamSize > R4
+;RUN: llc -mtriple=thumbv7-linux-gnueabihf -float-abi=hard < %s | FileCheck %s
+
+%st_t = type { i32, i32, i32, i32 }
+@static_val = constant %st_t { i32 777, i32 888, i32 787, i32 878}
+
+define void @foo(double %vfp0, ; --> D0, NSAA=SP
+ double %vfp1, ; --> D1, NSAA=SP
+ double %vfp2, ; --> D2, NSAA=SP
+ double %vfp3, ; --> D3, NSAA=SP
+ double %vfp4, ; --> D4, NSAA=SP
+ double %vfp5, ; --> D5, NSAA=SP
+ double %vfp6, ; --> D6, NSAA=SP
+ double %vfp7, ; --> D7, NSAA=SP
+ double %vfp8, ; --> SP, NSAA=SP+8 (!)
+ i32 %p0, ; --> R0, NSAA=SP+8
+ %st_t* byval %p1, ; --> SP+8, 4 words NSAA=SP+24
+ i32 %p2) #0 { ; --> SP+24, NSAA=SP+24
+
+entry:
+ ;CHECK: push.w {r11, lr}
+ ;CHECK: ldr r0, [sp, #32]
+ ;CHECK: bl fooUseI32
+ call void @fooUseI32(i32 %p2)
+ ret void
+}
+
+declare void @fooUseI32(i32)
+
+define void @doFoo() {
+entry:
+ call void @foo(double 23.0,
+ double 23.1,
+ double 23.2,
+ double 23.3,
+ double 23.4,
+ double 23.5,
+ double 23.6,
+ double 23.7,
+ double 23.8,
+ i32 0, %st_t* byval @static_val, i32 1)
+ ret void
+}
+