diff options
-rw-r--r-- | lib/CodeGen/TargetInfo.cpp | 50 | ||||
-rw-r--r-- | test/CodeGen/arm-abi-vector.c | 190 |
2 files changed, 238 insertions, 2 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index b43d33198f..24be05fb85 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -2799,6 +2799,7 @@ private: ABIArgInfo classifyReturnType(QualType RetTy) const; ABIArgInfo classifyArgumentType(QualType RetTy) const; + bool isIllegalVectorType(QualType Ty) const; virtual void computeInfo(CGFunctionInfo &FI) const; @@ -2945,6 +2946,27 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base, } ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const { + // Handle illegal vector types here. + if (isIllegalVectorType(Ty)) { + uint64_t Size = getContext().getTypeSize(Ty); + if (Size <= 32) { + llvm::Type *ResType = + llvm::Type::getInt32Ty(getVMContext()); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 64) { + llvm::Type *ResType = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), 2); + return ABIArgInfo::getDirect(ResType); + } + if (Size == 128) { + llvm::Type *ResType = llvm::VectorType::get( + llvm::Type::getInt32Ty(getVMContext()), 4); + return ABIArgInfo::getDirect(ResType); + } + return ABIArgInfo::getIndirect(0, /*ByVal=*/false); + } + if (!isAggregateTypeForABI(Ty)) { // Treat an enum type as its underlying type. if (const EnumType *EnumTy = Ty->getAs<EnumType>()) @@ -3161,6 +3183,21 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getIndirect(0); } +/// isIllegalVector - check whether Ty is an illegal vector type. +bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { + if (const VectorType *VT = Ty->getAs<VectorType>()) { + // Check whether VT is legal. + unsigned NumElements = VT->getNumElements(); + uint64_t Size = getContext().getTypeSize(VT); + // NumElements should be power of 2. + if ((NumElements & (NumElements - 1)) != 0) + return true; + // Size should be greater than 32 bits. + return Size <= 32; + } + return false; +} + llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, CodeGenFunction &CGF) const { llvm::Type *BP = CGF.Int8PtrTy; @@ -3172,6 +3209,7 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8; uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8; + bool IsIndirect = false; // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte. @@ -3182,6 +3220,12 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, else TyAlign = 4; } + // Use indirect if size of the illegal vector is bigger than 16 bytes. + if (isIllegalVectorType(Ty) && Size > 16) { + IsIndirect = true; + Size = 4; + TyAlign = 4; + } // Handle address alignment for ABI alignment > 4 bytes. if (TyAlign > 4) { @@ -3200,8 +3244,10 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, "ap.next"); Builder.CreateStore(NextAddr, VAListAddrAsBPP); - if (Ty->getAs<VectorType>() && - (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) { + if (IsIndirect) + Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP)); + else if (Ty->getAs<VectorType>() && + (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) { // We can't directly cast ap.cur to pointer to a vector type, since ap.cur // may not be correctly aligned for the vector type. We create an aligned // temporary space and copy the content over from ap.cur to the temporary diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c index fee41b8514..296359d8ea 100644 --- a/test/CodeGen/arm-abi-vector.c +++ b/test/CodeGen/arm-abi-vector.c @@ -4,6 +4,12 @@ #include <stdarg.h> typedef __attribute__(( ext_vector_type(2) )) int __int2; +typedef __attribute__(( ext_vector_type(3) )) char __char3; +typedef __attribute__(( ext_vector_type(5) )) char __char5; +typedef __attribute__(( ext_vector_type(9) )) char __char9; +typedef __attribute__(( ext_vector_type(19) )) char __char19; +typedef __attribute__(( ext_vector_type(3) )) short __short3; +typedef __attribute__(( ext_vector_type(5) )) short __short5; // Passing legal vector types as varargs. double varargs_vec_2i(int fixed, ...) { @@ -36,3 +42,187 @@ double test_2i(__int2 *in) { // APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1) return varargs_vec_2i(3, *in); } + +double varargs_vec_3c(int fixed, ...) { +// CHECK: varargs_vec_3c +// CHECK: %c3 = alloca <3 x i8>, align 4 +// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4 +// CHECK: %1 = bitcast i8* %ap.cur to <3 x i8>* +// APCS-GNU: varargs_vec_3c +// APCS-GNU: %c3 = alloca <3 x i8>, align 4 +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4 +// APCS-GNU: bitcast i8* %ap.cur to <3 x i8>* + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __char3 c3 = va_arg(ap, __char3); + sum = sum + c3.x + c3.y; + va_end(ap); + return sum; +} + +double test_3c(__char3 *in) { +// CHECK: test_3c +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2) +// APCS-GNU: test_3c +// APCS-GNU: call double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2) + return varargs_vec_3c(3, *in); +} + +double varargs_vec_5c(int fixed, ...) { +// CHECK: varargs_vec_5c +// CHECK: %c5 = alloca <5 x i8>, align 8 +// CHECK: %3 = and i32 %2, -8 +// CHECK: %ap.align = inttoptr i32 %3 to i8* +// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8 +// CHECK: bitcast i8* %ap.align to <5 x i8>* +// APCS-GNU: varargs_vec_5c +// APCS-GNU: %c5 = alloca <5 x i8>, align 8 +// APCS-GNU: %var.align = alloca <5 x i8> +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8 +// APCS-GNU: %1 = bitcast <5 x i8>* %var.align to i8* +// APCS-GNU: call void @llvm.memcpy +// APCS-GNU: %2 = load <5 x i8>* %var.align + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __char5 c5 = va_arg(ap, __char5); + sum = sum + c5.x + c5.y; + va_end(ap); + return sum; +} + +double test_5c(__char5 *in) { +// CHECK: test_5c +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3) +// APCS-GNU: test_5c +// APCS-GNU: call double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3) + return varargs_vec_5c(5, *in); +} + +double varargs_vec_9c(int fixed, ...) { +// CHECK: varargs_vec_9c +// CHECK: %c9 = alloca <9 x i8>, align 16 +// CHECK: %var.align = alloca <9 x i8> +// CHECK: %3 = and i32 %2, -8 +// CHECK: %ap.align = inttoptr i32 %3 to i8* +// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16 +// CHECK: %4 = bitcast <9 x i8>* %var.align to i8* +// CHECK: call void @llvm.memcpy +// CHECK: %5 = load <9 x i8>* %var.align +// APCS-GNU: varargs_vec_9c +// APCS-GNU: %c9 = alloca <9 x i8>, align 16 +// APCS-GNU: %var.align = alloca <9 x i8> +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16 +// APCS-GNU: %1 = bitcast <9 x i8>* %var.align to i8* +// APCS-GNU: call void @llvm.memcpy +// APCS-GNU: %2 = load <9 x i8>* %var.align + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __char9 c9 = va_arg(ap, __char9); + sum = sum + c9.x + c9.y; + va_end(ap); + return sum; +} + +double test_9c(__char9 *in) { +// CHECK: test_9c +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3) +// APCS-GNU: test_9c +// APCS-GNU: call double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3) + return varargs_vec_9c(9, *in); +} + +double varargs_vec_19c(int fixed, ...) { +// CHECK: varargs_vec_19c +// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4 +// CHECK: %1 = bitcast i8* %ap.cur to i8** +// CHECK: %2 = load i8** %1 +// CHECK: bitcast i8* %2 to <19 x i8>* +// APCS-GNU: varargs_vec_19c +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4 +// APCS-GNU: %1 = bitcast i8* %ap.cur to i8** +// APCS-GNU: %2 = load i8** %1 +// APCS-GNU: bitcast i8* %2 to <19 x i8>* + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __char19 c19 = va_arg(ap, __char19); + sum = sum + c19.x + c19.y; + va_end(ap); + return sum; +} + +double test_19c(__char19 *in) { +// CHECK: test_19c +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp) +// APCS-GNU: test_19c +// APCS-GNU: call double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp) + return varargs_vec_19c(19, *in); +} + +double varargs_vec_3s(int fixed, ...) { +// CHECK: varargs_vec_3s +// CHECK: %c3 = alloca <3 x i16>, align 8 +// CHECK: %3 = and i32 %2, -8 +// CHECK: %ap.align = inttoptr i32 %3 to i8* +// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8 +// CHECK: bitcast i8* %ap.align to <3 x i16>* +// APCS-GNU: varargs_vec_3s +// APCS-GNU: %c3 = alloca <3 x i16>, align 8 +// APCS-GNU: %var.align = alloca <3 x i16> +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8 +// APCS-GNU: %1 = bitcast <3 x i16>* %var.align to i8* +// APCS-GNU: call void @llvm.memcpy +// APCS-GNU: %2 = load <3 x i16>* %var.align + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __short3 c3 = va_arg(ap, __short3); + sum = sum + c3.x + c3.y; + va_end(ap); + return sum; +} + +double test_3s(__short3 *in) { +// CHECK: test_3s +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2) +// APCS-GNU: test_3s +// APCS-GNU: call double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2) + return varargs_vec_3s(3, *in); +} + +double varargs_vec_5s(int fixed, ...) { +// CHECK: varargs_vec_5s +// CHECK: %c5 = alloca <5 x i16>, align 16 +// CHECK: %var.align = alloca <5 x i16> +// CHECK: %3 = and i32 %2, -8 +// CHECK: %ap.align = inttoptr i32 %3 to i8* +// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16 +// CHECK: %4 = bitcast <5 x i16>* %var.align to i8* +// CHECK: call void @llvm.memcpy +// CHECK: %5 = load <5 x i16>* %var.align +// APCS-GNU: varargs_vec_5s +// APCS-GNU: %c5 = alloca <5 x i16>, align 16 +// APCS-GNU: %var.align = alloca <5 x i16> +// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16 +// APCS-GNU: %1 = bitcast <5 x i16>* %var.align to i8* +// APCS-GNU: call void @llvm.memcpy +// APCS-GNU: %2 = load <5 x i16>* %var.align + va_list ap; + double sum = fixed; + va_start(ap, fixed); + __short5 c5 = va_arg(ap, __short5); + sum = sum + c5.x + c5.y; + va_end(ap); + return sum; +} + +double test_5s(__short5 *in) { +// CHECK: test_5s +// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3) +// APCS-GNU: test_5s +// APCS-GNU: call double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3) + return varargs_vec_5s(5, *in); +} |