diff options
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 9 | ||||
-rw-r--r-- | test/CodeGen/arm-arguments.c | 32 |
2 files changed, 36 insertions, 5 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 9060337180..22292603f6 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -3221,16 +3221,15 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, int *VFPRegs, // Support byval for ARM. // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at most 8-byte. - // Byval can't handle the case where type alignment is bigger than ABI alignment. - // We also increase the threshold for byval due to its overhead. + // We realign the indirect argument if type alignment is bigger than ABI alignment. uint64_t ABIAlign = 4; uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8; if (getABIKind() == ARMABIInfo::AAPCS_VFP || getABIKind() == ARMABIInfo::AAPCS) ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8); - if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64*8) && - TyAlign <= ABIAlign) { - return ABIArgInfo::getIndirect(0, /*ByVal=*/true); + if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) { + return ABIArgInfo::getIndirect(0, /*ByVal=*/true, + /*Realign=*/TyAlign <= ABIAlign ? false : true); } // Otherwise, pass by coercing to a structure of the appropriate size. diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c index 8aa33f696d..63ecd4c599 100644 --- a/test/CodeGen/arm-arguments.c +++ b/test/CodeGen/arm-arguments.c @@ -191,3 +191,35 @@ void g34(struct s34 *s) { f34(*s); } // AAPCS: %[[a:.*]] = alloca { [1 x i32] } // AAPCS: %[[gep:.*]] = getelementptr { [1 x i32] }* %[[a]], i32 0, i32 0 // AAPCS: load [1 x i32]* %[[gep]] + +// rdar://12596507 +struct s35 +{ + float v[18]; //make sure byval is on. +} __attribute__((aligned(16))); +typedef struct s35 s35_with_align; + +typedef __attribute__((neon_vector_type(4))) float float32x4_t; +static __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32( + float32x4_t __a, float32x4_t __b) { + return __a + __b; +} +float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) { + float32x4_t v = vaddq_f32(*(float32x4_t *)&s1, + *(float32x4_t *)&s2); + return v; +} +// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval) +// APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16 +// APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8* +// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8* +// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] +// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* +// APCS-GNU: load <4 x float>* %[[d]], align 16 +// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval) +// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16 +// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8* +// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8* +// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] +// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* +// AAPCS: load <4 x float>* %[[d]], align 16 |