ARM ABI: fix ABI alignment issues when passing legal vector types as varargs.

We create an aligned temporary space and copy the content over from ap.cur to the temporary space. This is necessary if the natural alignment of the type is greater than the ABI alignment. rdar://12439123 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@166040 91177308-0d34-0410-b5e6-96231b3b80d8
author: Manman Ren <mren@apple.com> 2012-10-16 19:01:37 +0000
committer: Manman Ren <mren@apple.com> 2012-10-16 19:01:37 +0000
commit: d105e73368e677e65af724947be85ec87a0fa45e (patch)
tree: 64ffbfbde10a539b068c0b61560172eedeff434f
parent: 9b29f4fe3d0600edf6ba00d48f2d4f2b1984f247 (diff)
2 files changed, 76 insertions, 6 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 75cc5a37a5..b43d33198f 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -3169,27 +3169,59 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   CGBuilderTy &Builder = CGF.Builder;
   llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
   llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  // Handle address alignment for type alignment > 32 bits
+
+  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
   uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+
+  // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
+  // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
+  if (Ty->getAs<VectorType>()) {
+    if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+        getABIKind() == ARMABIInfo::AAPCS)
+      TyAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
+    else
+      TyAlign = 4;
+  }
+
+  // Handle address alignment for ABI alignment > 4 bytes.
   if (TyAlign > 4) {
     assert((TyAlign & (TyAlign - 1)) == 0 &&
            "Alignment is not power of 2!");
     llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
     AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
     AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
+    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
   }
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
 
   uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 4);
+    llvm::RoundUpToAlignment(Size, 4);
   llvm::Value *NextAddr =
     Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
+  if (Ty->getAs<VectorType>() &&
+      (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
+    // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
+    // may not be correctly aligned for the vector type. We create an aligned
+    // temporary space and copy the content over from ap.cur to the temporary
+    // space. This is necessary if the natural alignment of the type is greater
+    // than the ABI alignment.
+    llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
+    CharUnits CharSize = getContext().getTypeSizeInChars(Ty);
+    llvm::Value *AlignedTemp = CGF.CreateTempAlloca(CGF.ConvertType(Ty),
+                                                    "var.align");
+    llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
+    llvm::Value *Src = Builder.CreateBitCast(Addr, I8PtrTy);
+    Builder.CreateMemCpy(Dst, Src,
+        llvm::ConstantInt::get(CGF.IntPtrTy, CharSize.getQuantity()),
+        TyAlign, false);
+    Addr = AlignedTemp; //The content is in aligned location.
+  }
+  llvm::Type *PTy =
+    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
+  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
+
   return AddrTyped;
 }
 
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
new file mode 100644
index 0000000000..fee41b8514
--- /dev/null
+++ b/test/CodeGen/arm-abi-vector.c
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+
+#include <stdarg.h>
+
+typedef __attribute__(( ext_vector_type(2) ))  int __int2;
+
+// Passing legal vector types as varargs.
+double varargs_vec_2i(int fixed, ...) {
+// CHECK: varargs_vec_2i
+// CHECK: %c3 = alloca <2 x i32>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <2 x i32>*
+// APCS-GNU: varargs_vec_2i
+// APCS-GNU: %c3 = alloca <2 x i32>, align 8
+// APCS-GNU: %var.align = alloca <2 x i32>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <2 x i32>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <2 x i32>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __int2 c3 = va_arg(ap, __int2);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_2i(__int2 *in) {
+// CHECK: test_2i
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+// APCS-GNU: test_2i
+// APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
+  return varargs_vec_2i(3, *in);
+}
author	Manman Ren <mren@apple.com>	2012-10-16 19:01:37 +0000
committer	Manman Ren <mren@apple.com>	2012-10-16 19:01:37 +0000
commit	d105e73368e677e65af724947be85ec87a0fa45e (patch)
tree	64ffbfbde10a539b068c0b61560172eedeff434f
parent	9b29f4fe3d0600edf6ba00d48f2d4f2b1984f247 (diff)