aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2010-12-02 07:07:26 +0000
committerChris Lattner <sabre@nondot.org>2010-12-02 07:07:26 +0000
commit1b726771d00762fb5c4c2638e60d134c385493ae (patch)
tree9755162337f4b274f82dda181b76c11b64e1bf58
parentc0f31fd08537b65ad92db8ce860747e3402a07e6 (diff)
Improve codegen for initializer lists to use memset more aggressively
when an initializer is variable (I handled the constant case in a previous patch). This has three pieces: 1. Enhance AggValueSlot to have a 'isZeroed' bit to tell CGExprAgg that the memory being stored into has previously been memset to zero. 2. Teach CGExprAgg to not emit stores of zero to isZeroed memory. 3. Teach CodeGenFunction::EmitAggExpr to scan initializers to determine whether they are profitable to emit a memset + inividual stores vs stores for everything. The heuristic used is that a global has to be more than 16 bytes and has to be 3/4 zero to be candidate for this xform. The two testcases are illustrative of the scenarios this catches. We now codegen test9 into: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 400, i32 4, i1 false) %.array = getelementptr inbounds [100 x i32]* %Arr, i32 0, i32 0 %tmp = load i32* %X.addr, align 4 store i32 %tmp, i32* %.array and test10 into: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 392, i32 8, i1 false) %tmp = getelementptr inbounds %struct.b* %S, i32 0, i32 0 %tmp1 = getelementptr inbounds %struct.a* %tmp, i32 0, i32 0 %tmp2 = load i32* %X.addr, align 4 store i32 %tmp2, i32* %tmp1, align 4 %tmp5 = getelementptr inbounds %struct.b* %S, i32 0, i32 3 %tmp10 = getelementptr inbounds %struct.a* %tmp5, i32 0, i32 4 %tmp11 = load i32* %X.addr, align 4 store i32 %tmp11, i32* %tmp10, align 4 Previously we produced 99 stores of zero for test9 and also tons for test10. This xforms should substantially speed up -O0 builds when it kicks in as well as reducing code size and optimizer heartburn on insane cases. This resolves PR279. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@120692 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/CodeGen/CGDecl.cpp5
-rw-r--r--lib/CodeGen/CGExprAgg.cpp152
-rw-r--r--lib/CodeGen/CGValue.h17
-rw-r--r--lib/CodeGen/CodeGenFunction.h3
-rw-r--r--test/CodeGen/init.c30
5 files changed, 187 insertions, 20 deletions
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 3f87f9fbdc..4032bedbfc 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -696,8 +696,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D,
// Get the element type.
const llvm::Type *LElemTy = ConvertTypeForMem(Ty);
- const llvm::Type *LElemPtrTy =
- llvm::PointerType::get(LElemTy, Ty.getAddressSpace());
+ const llvm::Type *LElemPtrTy = LElemTy->getPointerTo(Ty.getAddressSpace());
llvm::Value *VLASize = EmitVLASize(Ty);
@@ -860,7 +859,7 @@ void CodeGenFunction::EmitAutoVarDecl(const VarDecl &D,
} else if (Init->getType()->isAnyComplexType()) {
EmitComplexExprIntoAddr(Init, Loc, isVolatile);
} else {
- EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true));
+ EmitAggExpr(Init, AggValueSlot::forAddr(Loc, isVolatile, true, false));
}
}
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index e19baa97f1..ab694e3112 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -497,11 +497,42 @@ void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T), T);
}
+/// isSimpleZero - If emitting this value will obviously just cause a store of
+/// zero to memory, return true. This can return false if uncertain, so it just
+/// handles simple cases.
+static bool isSimpleZero(const Expr *E, CodeGenFunction &CGF) {
+ // (0)
+ if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
+ return isSimpleZero(PE->getSubExpr(), CGF);
+ // 0
+ if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(E))
+ return IL->getValue() == 0;
+ // +0.0
+ if (const FloatingLiteral *FL = dyn_cast<FloatingLiteral>(E))
+ return FL->getValue().isPosZero();
+ // int()
+ if ((isa<ImplicitValueInitExpr>(E) || isa<CXXScalarValueInitExpr>(E)) &&
+ CGF.getTypes().isZeroInitializable(E->getType()))
+ return true;
+ // (int*)0 - Null pointer expressions.
+ if (const CastExpr *ICE = dyn_cast<CastExpr>(E))
+ return ICE->getCastKind() == CK_NullToPointer;
+ // '\0'
+ if (const CharacterLiteral *CL = dyn_cast<CharacterLiteral>(E))
+ return CL->getValue() == 0;
+
+ // Otherwise, hard case: conservatively return false.
+ return false;
+}
+
+
void
AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) {
// FIXME: Ignore result?
// FIXME: Are initializers affected by volatile?
- if (isa<ImplicitValueInitExpr>(E)) {
+ if (Dest.isZeroed() && isSimpleZero(E, CGF)) {
+ // Storing "i32 0" to a zero'd memory location is a noop.
+ } else if (isa<ImplicitValueInitExpr>(E)) {
EmitNullInitializationToLValue(LV, T);
} else if (T->isReferenceType()) {
RValue RV = CGF.EmitReferenceBindingToExpr(E, /*InitializedDecl=*/0);
@@ -509,13 +540,19 @@ AggExprEmitter::EmitInitializationToLValue(Expr* E, LValue LV, QualType T) {
} else if (T->isAnyComplexType()) {
CGF.EmitComplexExprIntoAddr(E, LV.getAddress(), false);
} else if (CGF.hasAggregateLLVMType(T)) {
- CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true));
+ CGF.EmitAggExpr(E, AggValueSlot::forAddr(LV.getAddress(), false, true,
+ false, Dest.isZeroed()));
} else {
CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(E), LV, T);
}
}
void AggExprEmitter::EmitNullInitializationToLValue(LValue LV, QualType T) {
+ // If the destination slot is already zeroed out before the aggregate is
+ // copied into it, we don't have to emit any zeros here.
+ if (Dest.isZeroed() && CGF.getTypes().isZeroInitializable(T))
+ return;
+
if (!CGF.hasAggregateLLVMType(T)) {
// For non-aggregates, we can store zero
llvm::Value *Null = llvm::Constant::getNullValue(CGF.ConvertType(T));
@@ -573,13 +610,27 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// FIXME: were we intentionally ignoring address spaces and GC attributes?
for (uint64_t i = 0; i != NumArrayElements; ++i) {
+ // If we're done emitting initializers and the destination is known-zeroed
+ // then we're done.
+ if (i == NumInitElements &&
+ Dest.isZeroed() &&
+ CGF.getTypes().isZeroInitializable(ElementType))
+ break;
+
llvm::Value *NextVal = Builder.CreateStructGEP(DestPtr, i, ".array");
LValue LV = CGF.MakeAddrLValue(NextVal, ElementType);
+
if (i < NumInitElements)
EmitInitializationToLValue(E->getInit(i), LV, ElementType);
-
else
EmitNullInitializationToLValue(LV, ElementType);
+
+ // If the GEP didn't get used because of a dead zero init or something
+ // else, clean it up for -O0 builds and general tidiness.
+ if (llvm::GetElementPtrInst *GEP =
+ dyn_cast<llvm::GetElementPtrInst>(NextVal))
+ if (GEP->use_empty())
+ GEP->eraseFromParent();
}
return;
}
@@ -612,13 +663,13 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// FIXME: volatility
FieldDecl *Field = E->getInitializedFieldInUnion();
- LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
+ LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, Field, 0);
if (NumInitElements) {
// Store the initializer into the field
EmitInitializationToLValue(E->getInit(0), FieldLoc, Field->getType());
} else {
- // Default-initialize to null
+ // Default-initialize to null.
EmitNullInitializationToLValue(FieldLoc, Field->getType());
}
@@ -638,10 +689,16 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
if (Field->isUnnamedBitfield())
continue;
+ // Don't emit GEP before a noop store of zero.
+ if (CurInitVal == NumInitElements && Dest.isZeroed() &&
+ CGF.getTypes().isZeroInitializable(E->getType()))
+ break;
+
// FIXME: volatility
LValue FieldLoc = CGF.EmitLValueForFieldInitialization(DestPtr, *Field, 0);
// We never generate write-barries for initialized fields.
FieldLoc.setNonGC(true);
+
if (CurInitVal < NumInitElements) {
// Store the initializer into the field.
EmitInitializationToLValue(E->getInit(CurInitVal++), FieldLoc,
@@ -650,6 +707,14 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// We're out of initalizers; default-initialize to null
EmitNullInitializationToLValue(FieldLoc, Field->getType());
}
+
+ // If the GEP didn't get used because of a dead zero init or something
+ // else, clean it up for -O0 builds and general tidiness.
+ if (FieldLoc.isSimple())
+ if (llvm::GetElementPtrInst *GEP =
+ dyn_cast<llvm::GetElementPtrInst>(FieldLoc.getAddress()))
+ if (GEP->use_empty())
+ GEP->eraseFromParent();
}
}
@@ -657,6 +722,69 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
// Entry Points into this File
//===----------------------------------------------------------------------===//
+/// GetNumNonZeroBytesInInit - Get an approximate count of the number of
+/// non-zero bytes that will be stored when outputting the initializer for the
+/// specified initializer expression.
+static uint64_t GetNumNonZeroBytesInInit(const Expr *E, CodeGenFunction &CGF) {
+ if (const ParenExpr *PE = dyn_cast<ParenExpr>(E))
+ return GetNumNonZeroBytesInInit(PE->getSubExpr(), CGF);
+
+ // 0 and 0.0 won't require any non-zero stores!
+ if (isSimpleZero(E, CGF)) return 0;
+
+ // If this is an initlist expr, sum up the size of sizes of the (present)
+ // elements. If this is something weird, assume the whole thing is non-zero.
+ const InitListExpr *ILE = dyn_cast<InitListExpr>(E);
+ if (ILE == 0 || !CGF.getTypes().isZeroInitializable(ILE->getType()))
+ return CGF.getContext().getTypeSize(E->getType())/8;
+
+ uint64_t NumNonZeroBytes = 0;
+ for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i)
+ NumNonZeroBytes += GetNumNonZeroBytesInInit(ILE->getInit(i), CGF);
+ return NumNonZeroBytes;
+}
+
+/// CheckAggExprForMemSetUse - If the initializer is large and has a lot of
+/// zeros in it, emit a memset and avoid storing the individual zeros.
+///
+static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
+ CodeGenFunction &CGF) {
+ // If the slot is already known to be zeroed, nothing to do. Don't mess with
+ // volatile stores.
+ if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == 0) return;
+
+ // If the type is 16-bytes or smaller, prefer individual stores over memset.
+ std::pair<uint64_t, unsigned> TypeInfo =
+ CGF.getContext().getTypeInfo(E->getType());
+ if (TypeInfo.first/8 <= 16)
+ return;
+
+ // Check to see if over 3/4 of the initializer are known to be zero. If so,
+ // we prefer to emit memset + individual stores for the rest.
+ uint64_t NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
+ if (NumNonZeroBytes*4 > TypeInfo.first/8)
+ return;
+
+ // Okay, it seems like a good idea to use an initial memset, emit the call.
+ llvm::Constant *SizeVal = CGF.Builder.getInt64(TypeInfo.first/8);
+ llvm::ConstantInt *AlignVal = CGF.Builder.getInt32(TypeInfo.second/8);
+
+ llvm::Value *Loc = Slot.getAddr();
+ const llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
+
+ Loc = CGF.Builder.CreateBitCast(Loc, BP);
+ CGF.Builder.CreateCall5(CGF.CGM.getMemSetFn(Loc->getType(),
+ SizeVal->getType()),
+ Loc, CGF.Builder.getInt8(0), SizeVal, AlignVal,
+ CGF.Builder.getFalse());
+
+ // Tell the AggExprEmitter that the slot is known zero.
+ Slot.setZeroed();
+}
+
+
+
+
/// EmitAggExpr - Emit the computation of the specified expression of aggregate
/// type. The result is computed into DestPtr. Note that if DestPtr is null,
/// the value of the aggregate expression is not needed. If VolatileDest is
@@ -670,20 +798,20 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot,
bool IgnoreResult) {
assert(E && hasAggregateLLVMType(E->getType()) &&
"Invalid aggregate expression to emit");
- assert((Slot.getAddr() != 0 || Slot.isIgnored())
- && "slot has bits but no address");
+ assert((Slot.getAddr() != 0 || Slot.isIgnored()) &&
+ "slot has bits but no address");
- AggExprEmitter(*this, Slot, IgnoreResult)
- .Visit(const_cast<Expr*>(E));
+ // Optimize the slot if possible.
+ CheckAggExprForMemSetUse(Slot, E, *this);
+
+ AggExprEmitter(*this, Slot, IgnoreResult).Visit(const_cast<Expr*>(E));
}
LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
assert(hasAggregateLLVMType(E->getType()) && "Invalid argument!");
llvm::Value *Temp = CreateMemTemp(E->getType());
LValue LV = MakeAddrLValue(Temp, E->getType());
- AggValueSlot Slot
- = AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false);
- EmitAggExpr(E, Slot);
+ EmitAggExpr(E, AggValueSlot::forAddr(Temp, LV.isVolatileQualified(), false));
return LV;
}
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 2de5750084..b7bb9d76a4 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -336,6 +336,11 @@ class AggValueSlot {
bool VolatileFlag : 1;
bool LifetimeFlag : 1;
bool RequiresGCollection : 1;
+
+ /// IsZeroed - This is set to true if the destination is known to be zero
+ /// before the assignment into it. This means that zero fields don't need to
+ /// be set.
+ bool IsZeroed : 1;
public:
/// ignored - Returns an aggregate value slot indicating that the
@@ -343,7 +348,7 @@ public:
static AggValueSlot ignored() {
AggValueSlot AV;
AV.Addr = 0;
- AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = 0;
+ AV.VolatileFlag = AV.LifetimeFlag = AV.RequiresGCollection = AV.IsZeroed =0;
return AV;
}
@@ -357,17 +362,19 @@ public:
/// somewhere that ObjC GC calls should be emitted for
static AggValueSlot forAddr(llvm::Value *Addr, bool Volatile,
bool LifetimeExternallyManaged,
- bool RequiresGCollection=false) {
+ bool RequiresGCollection = false,
+ bool IsZeroed = false) {
AggValueSlot AV;
AV.Addr = Addr;
AV.VolatileFlag = Volatile;
AV.LifetimeFlag = LifetimeExternallyManaged;
AV.RequiresGCollection = RequiresGCollection;
+ AV.IsZeroed = IsZeroed;
return AV;
}
static AggValueSlot forLValue(LValue LV, bool LifetimeExternallyManaged,
- bool RequiresGCollection=false) {
+ bool RequiresGCollection = false) {
return forAddr(LV.getAddress(), LV.isVolatileQualified(),
LifetimeExternallyManaged, RequiresGCollection);
}
@@ -399,6 +406,10 @@ public:
return RValue::getAggregate(getAddr(), isVolatile());
}
+ void setZeroed(bool V = true) { IsZeroed = V; }
+ bool isZeroed() const {
+ return IsZeroed;
+ }
};
} // end namespace CodeGen
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index 4a58d5b357..616c227ffb 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -1662,8 +1662,7 @@ public:
const BlockDeclRefExpr *BDRE);
RValue EmitCXXExprWithTemporaries(const CXXExprWithTemporaries *E,
- AggValueSlot Slot
- = AggValueSlot::ignored());
+ AggValueSlot Slot =AggValueSlot::ignored());
void EmitCXXThrowExpr(const CXXThrowExpr *E);
diff --git a/test/CodeGen/init.c b/test/CodeGen/init.c
index 2dee0432ef..ccb04f343c 100644
--- a/test/CodeGen/init.c
+++ b/test/CodeGen/init.c
@@ -70,3 +70,33 @@ char test8(int X) {
// CHECK: store i8 98
// CHECK: store i8 99
}
+
+void bar(void*);
+
+// PR279
+int test9(int X) {
+ int Arr[100] = { X }; // Should use memset
+ bar(Arr);
+// CHECK: @test9
+// CHECK: call void @llvm.memset
+// CHECK-NOT: store i32 0
+// CHECK: call void @bar
+}
+
+struct a {
+ int a, b, c, d, e, f, g, h, i, j, k, *p;
+};
+
+struct b {
+ struct a a,b,c,d,e,f,g;
+};
+
+int test10(int X) {
+ struct b S = { .a.a = X, .d.e = X, .f.e = 0, .f.f = 0, .f.p = 0 };
+ bar(&S);
+
+ // CHECK: @test10
+ // CHECK: call void @llvm.memset
+ // CHECK-NOT: store i32 0
+ // CHECK: call void @bar
+}