2 files changed, 238 insertions, 2 deletions
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index b43d33198f..24be05fb85 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -2799,6 +2799,7 @@ private:
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const;
 
@@ -2945,6 +2946,27 @@ static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
+  // Handle illegal vector types here.
+  if (isIllegalVectorType(Ty)) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 32) {
+      llvm::Type *ResType =
+          llvm::Type::getInt32Ty(getVMContext());
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 64) {
+      llvm::Type *ResType = llvm::VectorType::get(
+          llvm::Type::getInt32Ty(getVMContext()), 2);
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 128) {
+      llvm::Type *ResType = llvm::VectorType::get(
+          llvm::Type::getInt32Ty(getVMContext()), 4);
+      return ABIArgInfo::getDirect(ResType);
+    }
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -3161,6 +3183,21 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy) const {
   return ABIArgInfo::getIndirect(0);
 }
 
+/// isIllegalVector - check whether Ty is an illegal vector type.
+bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    // Check whether VT is legal.
+    unsigned NumElements = VT->getNumElements();
+    uint64_t Size = getContext().getTypeSize(VT);
+    // NumElements should be power of 2.
+    if ((NumElements & (NumElements - 1)) != 0)
+      return true;
+    // Size should be greater than 32 bits.
+    return Size <= 32;
+  }
+  return false;
+}
+
 llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGenFunction &CGF) const {
   llvm::Type *BP = CGF.Int8PtrTy;
@@ -3172,6 +3209,7 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 
   uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
   uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+  bool IsIndirect = false;
 
   // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
   // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
@@ -3182,6 +3220,12 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
     else
       TyAlign = 4;
   }
+  // Use indirect if size of the illegal vector is bigger than 16 bytes.
+  if (isIllegalVectorType(Ty) && Size > 16) {
+    IsIndirect = true;
+    Size = 4;
+    TyAlign = 4;
+  }
 
   // Handle address alignment for ABI alignment > 4 bytes.
   if (TyAlign > 4) {
@@ -3200,8 +3244,10 @@ llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
-  if (Ty->getAs<VectorType>() &&
-      (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
+  if (IsIndirect)
+    Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
+  else if (Ty->getAs<VectorType>() &&
+           (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
     // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
     // may not be correctly aligned for the vector type. We create an aligned
     // temporary space and copy the content over from ap.cur to the temporary
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
index fee41b8514..296359d8ea 100644
--- a/test/CodeGen/arm-abi-vector.c
+++ b/test/CodeGen/arm-abi-vector.c
@@ -4,6 +4,12 @@
 #include <stdarg.h>
 
 typedef __attribute__(( ext_vector_type(2) ))  int __int2;
+typedef __attribute__(( ext_vector_type(3) ))  char __char3;
+typedef __attribute__(( ext_vector_type(5) ))  char __char5;
+typedef __attribute__(( ext_vector_type(9) ))  char __char9;
+typedef __attribute__(( ext_vector_type(19) )) char __char19;
+typedef __attribute__(( ext_vector_type(3) ))  short __short3;
+typedef __attribute__(( ext_vector_type(5) ))  short __short5;
 
 // Passing legal vector types as varargs.
 double varargs_vec_2i(int fixed, ...) {
@@ -36,3 +42,187 @@ double test_2i(__int2 *in) {
 // APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
   return varargs_vec_2i(3, *in);
 }
+
+double varargs_vec_3c(int fixed, ...) {
+// CHECK: varargs_vec_3c
+// CHECK: %c3 = alloca <3 x i8>, align 4
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4
+// CHECK: %1 = bitcast i8* %ap.cur to <3 x i8>*
+// APCS-GNU: varargs_vec_3c
+// APCS-GNU: %c3 = alloca <3 x i8>, align 4
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4
+// APCS-GNU: bitcast i8* %ap.cur to <3 x i8>*
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char3 c3 = va_arg(ap, __char3);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_3c(__char3 *in) {
+// CHECK: test_3c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2)
+// APCS-GNU: test_3c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2)
+  return varargs_vec_3c(3, *in);
+}
+
+double varargs_vec_5c(int fixed, ...) {
+// CHECK: varargs_vec_5c
+// CHECK: %c5 = alloca <5 x i8>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <5 x i8>*
+// APCS-GNU: varargs_vec_5c
+// APCS-GNU: %c5 = alloca <5 x i8>, align 8
+// APCS-GNU: %var.align = alloca <5 x i8>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <5 x i8>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <5 x i8>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char5 c5 = va_arg(ap, __char5);
+  sum = sum + c5.x + c5.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_5c(__char5 *in) {
+// CHECK: test_5c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3)
+// APCS-GNU: test_5c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3)
+  return varargs_vec_5c(5, *in);
+}
+
+double varargs_vec_9c(int fixed, ...) {
+// CHECK: varargs_vec_9c
+// CHECK: %c9 = alloca <9 x i8>, align 16
+// CHECK: %var.align = alloca <9 x i8>
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16
+// CHECK: %4 = bitcast <9 x i8>* %var.align to i8*
+// CHECK: call void @llvm.memcpy
+// CHECK: %5 = load <9 x i8>* %var.align
+// APCS-GNU: varargs_vec_9c
+// APCS-GNU: %c9 = alloca <9 x i8>, align 16
+// APCS-GNU: %var.align = alloca <9 x i8>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16
+// APCS-GNU: %1 = bitcast <9 x i8>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <9 x i8>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char9 c9 = va_arg(ap, __char9);
+  sum = sum + c9.x + c9.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_9c(__char9 *in) {
+// CHECK: test_9c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3)
+// APCS-GNU: test_9c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3)
+  return varargs_vec_9c(9, *in);
+}
+
+double varargs_vec_19c(int fixed, ...) {
+// CHECK: varargs_vec_19c
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4
+// CHECK: %1 = bitcast i8* %ap.cur to i8**
+// CHECK: %2 = load i8** %1
+// CHECK: bitcast i8* %2 to <19 x i8>*
+// APCS-GNU: varargs_vec_19c
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4
+// APCS-GNU: %1 = bitcast i8* %ap.cur to i8**
+// APCS-GNU: %2 = load i8** %1
+// APCS-GNU: bitcast i8* %2 to <19 x i8>*
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char19 c19 = va_arg(ap, __char19);
+  sum = sum + c19.x + c19.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_19c(__char19 *in) {
+// CHECK: test_19c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp)
+// APCS-GNU: test_19c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp)
+  return varargs_vec_19c(19, *in);
+}
+
+double varargs_vec_3s(int fixed, ...) {
+// CHECK: varargs_vec_3s
+// CHECK: %c3 = alloca <3 x i16>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <3 x i16>*
+// APCS-GNU: varargs_vec_3s
+// APCS-GNU: %c3 = alloca <3 x i16>, align 8
+// APCS-GNU: %var.align = alloca <3 x i16>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <3 x i16>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <3 x i16>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __short3 c3 = va_arg(ap, __short3);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_3s(__short3 *in) {
+// CHECK: test_3s
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2)
+// APCS-GNU: test_3s
+// APCS-GNU: call double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2)
+  return varargs_vec_3s(3, *in);
+}
+
+double varargs_vec_5s(int fixed, ...) {
+// CHECK: varargs_vec_5s
+// CHECK: %c5 = alloca <5 x i16>, align 16
+// CHECK: %var.align = alloca <5 x i16>
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16
+// CHECK: %4 = bitcast <5 x i16>* %var.align to i8*
+// CHECK: call void @llvm.memcpy
+// CHECK: %5 = load <5 x i16>* %var.align
+// APCS-GNU: varargs_vec_5s
+// APCS-GNU: %c5 = alloca <5 x i16>, align 16
+// APCS-GNU: %var.align = alloca <5 x i16>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16
+// APCS-GNU: %1 = bitcast <5 x i16>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <5 x i16>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __short5 c5 = va_arg(ap, __short5);
+  sum = sum + c5.x + c5.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_5s(__short5 *in) {
+// CHECK: test_5s
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3)
+// APCS-GNU: test_5s
+// APCS-GNU: call double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3)
+  return varargs_vec_5s(5, *in);
+}