Change CGCall to handle the "coerce" case where the coerce-to type

is a FCA to pass each of the elements as individual scalars. This produces code fast isel is less likely to reject and is easier on the optimizers. For example, before we would compile: struct DeclGroup { long NumDecls; char * Y; }; char * foo(DeclGroup D) { return D.NumDecls+D.Y; } to: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(%struct.DeclGroup) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] store %struct.DeclGroup %0, %struct.DeclGroup* %D, align 1 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1] %tmp3 = load i64* %tmp2 ; <i64> [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1] ret i64 %add } Now we get: %0 = type { i64, i64 } %struct.DeclGroup = type { i64, i8* } define i8* @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] %2 = insertvalue %0 undef, i64 %0, 0 ; <%0> [#uses=1] %3 = insertvalue %0 %2, i64 %1, 1 ; <%0> [#uses=1] %4 = bitcast %struct.DeclGroup* %D to %0* ; <%0*> [#uses=1] store %0 %3, %0* %4, align 1 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i8**> [#uses=1] %tmp3 = load i8** %tmp2 ; <i8*> [#uses=1] %add.ptr = getelementptr inbounds i8* %tmp3, i64 %tmp1 ; <i8*> [#uses=1] ret i8* %add.ptr } Elimination of the FCA inside the function is still-to-come. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@107099 91177308-0d34-0410-b5e6-96231b3b80d8
author: Chris Lattner <sabre@nondot.org> 2010-06-28 23:44:11 +0000
committer: Chris Lattner <sabre@nondot.org> 2010-06-28 23:44:11 +0000
commit: ce70016434ff82a29a60ef82894d934b8a23f23d (patch)
tree: 42e0fc3518769095627c301d2d2d2c0e2456cc4e /lib/CodeGen/CGCall.cpp
parent: bc55618eb97c519b8c186c350cb419f89021b073 (diff)
1 files changed, 60 insertions, 11 deletions
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 9719dfa432..eb517edd81 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -240,7 +240,8 @@ const CGFunctionInfo &CodeGenTypes::getFunctionInfo(CanQualType ResTy,
     return *FI;
 
   // Construct the function info.
-  FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy, ArgTys);
+  FI = new CGFunctionInfo(CC, Info.getNoReturn(), Info.getRegParm(), ResTy,
+                          ArgTys);
   FunctionInfos.InsertNode(FI, InsertPos);
 
   // Compute ABI information.
@@ -259,6 +260,8 @@ CGFunctionInfo::CGFunctionInfo(unsigned _CallingConvention,
     NoReturn(_NoReturn), RegParm(_RegParm)
 {
   NumArgs = ArgTys.size();
+  
+  // FIXME: Coallocate with the CGFunctionInfo object.
   Args = new ArgInfo[1 + NumArgs];
   Args[0].type = ResTy;
   for (unsigned i = 0; i < NumArgs; ++i)
@@ -593,9 +596,19 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI, bool IsVariadic) {
     case ABIArgInfo::Ignore:
       break;
 
-    case ABIArgInfo::Coerce:
-      ArgTys.push_back(AI.getCoerceToType());
+    case ABIArgInfo::Coerce: {
+      // If the coerce-to type is a first class aggregate, flatten it.  Either
+      // way is semantically identical, but fast-isel and the optimizer
+      // generally likes scalar values better than FCAs.
+      const llvm::Type *ArgTy = AI.getCoerceToType();
+      if (const llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgTy)) {
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          ArgTys.push_back(STy->getElementType(i));
+      } else {
+        ArgTys.push_back(ArgTy);
+      }
       break;
+    }
 
     case ABIArgInfo::Indirect: {
       // indirect arguments are always on the stack, which is addr space #0.
@@ -713,7 +726,12 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
 
     switch (AI.getKind()) {
     case ABIArgInfo::Coerce:
-      break;
+      if (const llvm::StructType *STy =
+          dyn_cast<llvm::StructType>(AI.getCoerceToType()))
+        Index += STy->getNumElements();
+      else
+        ++Index;
+      continue;  // Skip index increment.
 
     case ABIArgInfo::Indirect:
       if (AI.getIndirectByVal())
@@ -806,7 +824,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
     switch (ArgI.getKind()) {
     case ABIArgInfo::Indirect: {
-      llvm::Value* V = AI;
+      llvm::Value *V = AI;
       if (hasAggregateLLVMType(Ty)) {
         // Do nothing, aggregates and complex variables are accessed by
         // reference.
@@ -826,7 +844,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
     case ABIArgInfo::Extend:
     case ABIArgInfo::Direct: {
       assert(AI != Fn->arg_end() && "Argument mismatch!");
-      llvm::Value* V = AI;
+      llvm::Value *V = AI;
       if (hasAggregateLLVMType(Ty)) {
         // Create a temporary alloca to hold the argument; the rest of
         // codegen expects to access aggregates & complex values by
@@ -876,12 +894,29 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       continue;
 
     case ABIArgInfo::Coerce: {
-      assert(AI != Fn->arg_end() && "Argument mismatch!");
+      // If the coerce-to type is a first class aggregate, we flatten it and
+      // pass the elements. Either way is semantically identical, but fast-isel
+      // and the optimizer generally likes scalar values better than FCAs.
+      llvm::Value *FormalArg;
+      if (const llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
+        // Reconstruct the FCA here.
+        // FIXME: If we have a direct match, do nice gep/store series.
+        FormalArg = llvm::UndefValue::get(STy);
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          assert(AI != Fn->arg_end() && "Argument mismatch!");
+          FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
+        }
+      } else {
+        assert(AI != Fn->arg_end() && "Argument mismatch!");
+        FormalArg = AI++;
+      }
+      
       // FIXME: This is very wasteful; EmitParmDecl is just going to drop the
       // result in a new alloca anyway, so we could just store into that
       // directly if we broke the abstraction down more.
       llvm::Value *V = CreateMemTemp(Ty, "coerce");
-      CreateCoercedStore(AI, V, /*DestIsVolatile=*/false, *this);
+      CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
       // Match to what EmitParmDecl is expecting for this type.
       if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
         V = EmitLoadOfScalar(V, false, Ty);
@@ -892,7 +927,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         }
       }
       EmitParmDecl(*Arg, V);
-      break;
+      continue;  // Skip ++AI increment, already done.
     }
     }
 
@@ -1080,8 +1115,22 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         StoreComplexToAddr(RV.getComplexVal(), SrcPtr, false);
       } else
         SrcPtr = RV.getAggregateAddr();
-      Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
-                                       *this));
+      
+      llvm::Value *SrcVal = 
+        CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
+      
+      // If the coerce-to type is a first class aggregate, we flatten it and
+      // pass the elements. Either way is semantically identical, but fast-isel
+      // and the optimizer generally likes scalar values better than FCAs.
+      if (const llvm::StructType *STy =
+            dyn_cast<llvm::StructType>(SrcVal->getType())) {
+        // Extract the elements of the value to pass in.
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          Args.push_back(Builder.CreateExtractValue(SrcVal, i));
+      } else {
+        Args.push_back(SrcVal);
+      }
+      
       break;
     }
author	Chris Lattner <sabre@nondot.org>	2010-06-28 23:44:11 +0000
committer	Chris Lattner <sabre@nondot.org>	2010-06-28 23:44:11 +0000
commit	ce70016434ff82a29a60ef82894d934b8a23f23d (patch)
tree	42e0fc3518769095627c301d2d2d2c0e2456cc4e /lib/CodeGen/CGCall.cpp
parent	bc55618eb97c519b8c186c350cb419f89021b073 (diff)