diff options
-rw-r--r-- | lib/CodeGen/CGCall.cpp | 49 | ||||
-rw-r--r-- | test/CodeGen/byval-memcpy-elim.c | 33 |
2 files changed, 77 insertions, 5 deletions
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp index 712ae89a48..5e9ecd574b 100644 --- a/lib/CodeGen/CGCall.cpp +++ b/lib/CodeGen/CGCall.cpp @@ -1263,12 +1263,51 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Alignment, I->Ty); else StoreComplexToAddr(RV.getComplexVal(), Args.back(), false); - } else if (I->NeedsCopy && !ArgInfo.getIndirectByVal()) { - Args.push_back(CreateMemTemp(I->Ty)); - EmitAggregateCopy(Args.back(), RV.getAggregateAddr(), I->Ty, - RV.isVolatileQualified()); } else { - Args.push_back(RV.getAggregateAddr()); + // We want to avoid creating an unnecessary temporary+copy here; + // however, we need one in two cases: + // 1. If the argument is not byval, and we are required to copy the + // source. (This case doesn't occur on any common architecture.) + // 2. If the argument is byval, RV is not sufficiently aligned, and + // we cannot force it to be sufficiently aligned. + // FIXME: This code is ugly because we don't know the required + // alignment when RV is generated. + llvm::AllocaInst *AI = + dyn_cast<llvm::AllocaInst>(RV.getAggregateAddr()); + bool NeedsAggCopy = false; + if (I->NeedsCopy && !ArgInfo.getIndirectByVal()) + NeedsAggCopy = true; + if (ArgInfo.getIndirectByVal()) { + if (AI) { + // The source is an alloca; we can force appropriate alignment. + if (ArgInfo.getIndirectAlign() > AI->getAlignment()) + AI->setAlignment(ArgInfo.getIndirectAlign()); + } else if (llvm::Argument *A = + dyn_cast<llvm::Argument>(RV.getAggregateAddr())) { + // Check if the source is an appropriately aligned byval argument. + if (!A->hasByValAttr() || + A->getParamAlignment() < ArgInfo.getIndirectAlign()) + NeedsAggCopy = true; + } else { + // We don't know what the input is; force a temporary+copy if + // the type alignment is not sufficient. + assert(I->NeedsCopy && "Temporary must be AllocaInst"); + if (ArgInfo.getIndirectAlign() > Alignment) + NeedsAggCopy = true; + } + } + if (NeedsAggCopy) { + // Create an aligned temporary, and copy to it. + AI = CreateMemTemp(I->Ty); + if (ArgInfo.getIndirectAlign() > AI->getAlignment()) + AI->setAlignment(ArgInfo.getIndirectAlign()); + Args.push_back(AI); + EmitAggregateCopy(AI, RV.getAggregateAddr(), I->Ty, + RV.isVolatileQualified()); + } else { + // Skip the extra memcpy call. + Args.push_back(RV.getAggregateAddr()); + } } break; } diff --git a/test/CodeGen/byval-memcpy-elim.c b/test/CodeGen/byval-memcpy-elim.c index 8aa08fb07b..76cdafb5e8 100644 --- a/test/CodeGen/byval-memcpy-elim.c +++ b/test/CodeGen/byval-memcpy-elim.c @@ -18,3 +18,36 @@ void test1a(struct Test1S, struct Test2S); void test1(struct Test1S *A, struct Test2S *B) { test1a(*A, *B); } + +// The above gets tricker when the byval argument requires higher alignment +// than the natural alignment of the type in question. +// rdar://9483886 + +// Make sure we do generate a memcpy when we cannot guarantee alignment. +struct Test3S { + int a,b,c,d,e,f,g,h,i,j,k,l; +}; +void test2a(struct Test3S q); +// CHECK: define void @test2( +// CHECK: alloca %struct.Test3S, align 8 +// CHECK: memcpy +// CHECK: call void @test2a +void test2(struct Test3S *q) { + test2a(*q); +} + +// But make sure we don't generate a memcpy when we can guarantee alignment. +void fooey(void); +// CHECK: define void @test3( +// CHECK: alloca %struct.Test3S, align 8 +// CHECK: call void @fooey +// CHECK-NOT: memcpy +// CHECK: call void @test2a +// CHECK-NOT: memcpy +// CHECK: call void @test2a +void test3(struct Test3S a) { + struct Test3S b = a; + fooey(); + test2a(a); + test2a(b); +} |