aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CGCall.cpp
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-06-29 00:06:42 +0000
committerChris Lattner <sabre@nondot.org>2010-06-29 00:06:42 +0000
commit309c59f6d3a4fd883fdf87334271df2c55338aae (patch)
tree3da3e772937453a52cb9695d195ea89c14dd63bd /lib/CodeGen/CGCall.cpp
parent4d072932287eb074a4168804cac1acb18a51d5e8 (diff)
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For example, before we made: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3] %2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1] %3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1] store %struct.DeclGroup %3, %struct.DeclGroup* %D %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1] %tmp3 = load i64* %tmp2 ; <i64> [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1] ret i64 %add } ... which has the pointless insertvalue, which fastisel hates, now we make: %struct.DeclGroup = type { i64, i64 } define i64 @_Z3foo9DeclGroup(i64, i64) nounwind { entry: %D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4] %2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] store i64 %0, i64* %2 %3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1] store i64 %1, i64* %3 %tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1] %tmp1 = load i64* %tmp ; <i64> [#uses=1] %tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1] %tmp3 = load i64* %tmp2 ; <i64> [#uses=1] %add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1] ret i64 %add } This only kicks in when x86-64 abi lowering decides it likes us. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@107104 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/CodeGen/CGCall.cpp')
-rw-r--r--lib/CodeGen/CGCall.cpp67
1 files changed, 46 insertions, 21 deletions
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index eb517edd81..4d72d91cb7 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -894,29 +894,41 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
continue;
case ABIArgInfo::Coerce: {
+ // FIXME: This is very wasteful; EmitParmDecl is just going to drop the
+ // result in a new alloca anyway, so we could just store into that
+ // directly if we broke the abstraction down more.
+ llvm::Value *V = CreateMemTemp(Ty, "coerce");
+
// If the coerce-to type is a first class aggregate, we flatten it and
// pass the elements. Either way is semantically identical, but fast-isel
// and the optimizer generally likes scalar values better than FCAs.
- llvm::Value *FormalArg;
if (const llvm::StructType *STy =
dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
- // Reconstruct the FCA here.
- // FIXME: If we have a direct match, do nice gep/store series.
- FormalArg = llvm::UndefValue::get(STy);
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- assert(AI != Fn->arg_end() && "Argument mismatch!");
- FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
+ // If the argument and alloca types match up, we don't have to build the
+ // FCA at all, emit a series of GEPs and stores, which is better for
+ // fast isel.
+ if (STy == cast<llvm::PointerType>(V->getType())->getElementType()) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ assert(AI != Fn->arg_end() && "Argument mismatch!");
+ llvm::Value *EltPtr = Builder.CreateConstGEP2_32(V, 0, i);
+ Builder.CreateStore(AI++, EltPtr);
+ }
+ } else {
+ // Reconstruct the FCA here so we can do a coerced store.
+ llvm::Value *FormalArg = llvm::UndefValue::get(STy);
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ assert(AI != Fn->arg_end() && "Argument mismatch!");
+ FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
+ }
+ CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
}
} else {
+ // Simple case, just do a coerced store of the argument into the alloca.
assert(AI != Fn->arg_end() && "Argument mismatch!");
- FormalArg = AI++;
+ CreateCoercedStore(AI++, V, /*DestIsVolatile=*/false, *this);
}
- // FIXME: This is very wasteful; EmitParmDecl is just going to drop the
- // result in a new alloca anyway, so we could just store into that
- // directly if we broke the abstraction down more.
- llvm::Value *V = CreateMemTemp(Ty, "coerce");
- CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
+
// Match to what EmitParmDecl is expecting for this type.
if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
V = EmitLoadOfScalar(V, false, Ty);
@@ -1116,19 +1128,32 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
} else
SrcPtr = RV.getAggregateAddr();
- llvm::Value *SrcVal =
- CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
-
// If the coerce-to type is a first class aggregate, we flatten it and
// pass the elements. Either way is semantically identical, but fast-isel
// and the optimizer generally likes scalar values better than FCAs.
if (const llvm::StructType *STy =
- dyn_cast<llvm::StructType>(SrcVal->getType())) {
- // Extract the elements of the value to pass in.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
- Args.push_back(Builder.CreateExtractValue(SrcVal, i));
+ dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
+ // If the argument and alloca types match up, we don't have to build the
+ // FCA at all, emit a series of GEPs and loads, which is better for
+ // fast isel.
+ if (STy ==cast<llvm::PointerType>(SrcPtr->getType())->getElementType()){
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i);
+ Args.push_back(Builder.CreateLoad(EltPtr));
+ }
+ } else {
+ // Otherwise, do a coerced load the entire FCA and handle the pieces.
+ llvm::Value *SrcVal =
+ CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
+
+ // Extract the elements of the value to pass in.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+ Args.push_back(Builder.CreateExtractValue(SrcVal, i));
+ }
} else {
- Args.push_back(SrcVal);
+ // In the simple case, just pass the coerced loaded value.
+ Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
+ *this));
}
break;