diff options
author | Benjamin Kramer <benny.kra@googlemail.com> | 2013-04-13 12:53:18 +0000 |
---|---|---|
committer | Benjamin Kramer <benny.kra@googlemail.com> | 2013-04-13 12:53:18 +0000 |
commit | 8848680ce0ab416cb646d0a03aa6f4f6f25e7623 (patch) | |
tree | 8cb969d787c126e7e54de60aa90c7977169fcffc | |
parent | b99c995825a49f0da5af40ee1b61269deb8994b5 (diff) |
Fix a scalability issue with complex ConstantExprs.
This is basically the same fix in three different places. We use a set to avoid
walking the whole tree of a big ConstantExprs multiple times.
For example: (select cmp, (add big_expr 1), (add big_expr 2))
We don't want to visit big_expr twice here, it may consist of thousands of
nodes.
The testcase exercises this by creating an insanely large ConstantExprs out of
a loop. It's questionable if the optimizer should ever create those, but this
can be triggered with real C code. Fixes PR15714.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179458 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Analysis/ConstantFolding.cpp | 36 | ||||
-rw-r--r-- | lib/IR/Constants.cpp | 28 | ||||
-rw-r--r-- | lib/Transforms/IPO/GlobalDCE.cpp | 13 | ||||
-rw-r--r-- | test/Transforms/GlobalDCE/complex-constantexpr.ll | 97 |
4 files changed, 149 insertions, 25 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 09d7608c51..b5286e8f29 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" @@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } -/// ConstantFoldConstantExpression - Attempt to fold the constant expression -/// using the specified DataLayout. If successful, the constant result is -/// result is returned, if not, null is returned. -Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, - const DataLayout *TD, - const TargetLibraryInfo *TLI) { - SmallVector<Constant*, 8> Ops; - for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); - i != e; ++i) { +static Constant * +ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD, + const TargetLibraryInfo *TLI, + SmallPtrSet<ConstantExpr *, 4> &FoldedOps) { + SmallVector<Constant *, 8> Ops; + for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; + ++i) { Constant *NewC = cast<Constant>(*i); - // Recursively fold the ConstantExpr's operands. - if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) - NewC = ConstantFoldConstantExpression(NewCE, TD, TLI); + // Recursively fold the ConstantExpr's operands. If we have already folded + // a ConstantExpr, we don't have to process it again. + if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) { + if (FoldedOps.insert(NewCE)) + NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps); + } Ops.push_back(NewC); } @@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI); } +/// ConstantFoldConstantExpression - Attempt to fold the constant expression +/// using the specified DataLayout. If successful, the constant result is +/// result is returned, if not, null is returned. +Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE, + const DataLayout *TD, + const TargetLibraryInfo *TLI) { + SmallPtrSet<ConstantExpr *, 4> FoldedOps; + return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps); +} + /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the /// specified opcode and operands. If successful, the constant result is /// returned, if not, null is returned. Note that this function can fail when diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index 1abb656435..2c6971c83e 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() { delete this; } -/// canTrap - Return true if evaluation of this constant could trap. This is -/// true for things like constant expressions that could divide by zero. -bool Constant::canTrap() const { - assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); +static bool canTrapImpl(const Constant *C, + SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) { + assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); // The only thing that could possibly trap are constant exprs. - const ConstantExpr *CE = dyn_cast<ConstantExpr>(this); - if (!CE) return false; + const ConstantExpr *CE = dyn_cast<ConstantExpr>(C); + if (!CE) + return false; // ConstantExpr traps if any operands can trap. - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (CE->getOperand(i)->canTrap()) - return true; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) { + if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps)) + return true; + } + } // Otherwise, only specific operations can trap. switch (CE->getOpcode()) { @@ -267,6 +270,13 @@ bool Constant::canTrap() const { } } +/// canTrap - Return true if evaluation of this constant could trap. This is +/// true for things like constant expressions that could divide by zero. +bool Constant::canTrap() const { + SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps; + return canTrapImpl(this, NonTrappingOps); +} + /// isThreadDependent - Return true if the value can vary between threads. bool Constant::isThreadDependent() const { SmallPtrSet<const Constant*, 64> Visited; diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index dc99492990..76d1287fb7 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -42,6 +42,7 @@ namespace { private: SmallPtrSet<GlobalValue*, 32> AliveGlobals; + SmallPtrSet<Constant *, 8> SeenConstants; /// GlobalIsNeeded - mark the specific global value as needed, and /// recursively mark anything that it uses as also needed. @@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) { // Make sure that all memory is released AliveGlobals.clear(); + SeenConstants.clear(); return Changed; } @@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) { void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) { if (GlobalValue *GV = dyn_cast<GlobalValue>(C)) return GlobalIsNeeded(GV); - + // Loop over all of the operands of the constant, adding any globals they // use to the list of needed globals. - for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) - if (Constant *OpC = dyn_cast<Constant>(*I)) - MarkUsedGlobalsAsNeeded(OpC); + for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) { + // If we've already processed this constant there's no need to do it again. + Constant *Op = cast<Constant>(*I); + if (SeenConstants.insert(Op)) + MarkUsedGlobalsAsNeeded(Op); + } } // RemoveUnusedGlobalValue - Loop over all of the uses of the specified diff --git a/test/Transforms/GlobalDCE/complex-constantexpr.ll b/test/Transforms/GlobalDCE/complex-constantexpr.ll new file mode 100644 index 0000000000..4bf1aeee70 --- /dev/null +++ b/test/Transforms/GlobalDCE/complex-constantexpr.ll @@ -0,0 +1,97 @@ +; RUN: opt -O2 -disable-output < %s +; PR15714 + +%struct.ham = type { i32 } + +@global5 = common global i32 0, align 4 +@global6 = common global i32 0, align 4 +@global7 = common global i32 0, align 4 +@global = common global i32 0, align 4 +@global8 = common global %struct.ham zeroinitializer, align 4 +@global9 = common global i32 0, align 4 +@global10 = common global i32 0, align 4 +@global11 = common global i32 0, align 4 + +define void @zot12() { +bb: + store i32 0, i32* @global5, align 4 + store i32 0, i32* @global6, align 4 + br label %bb2 + +bb1: ; preds = %bb11 + %tmp = load i32* @global5, align 4 + br label %bb2 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi i32 [ %tmp, %bb1 ], [ 0, %bb ] + %tmp4 = xor i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32) + store i32 %tmp4, i32* @global5, align 4 + %tmp5 = icmp eq i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32) + br i1 %tmp5, label %bb8, label %bb6 + +bb6: ; preds = %bb2 + %tmp7 = tail call i32 @quux13() + br label %bb8 + +bb8: ; preds = %bb6, %bb2 + %tmp9 = load i32* @global7, align 4 + %tmp10 = icmp eq i32 %tmp9, 0 + br i1 %tmp10, label %bb11, label %bb15 + +bb11: ; preds = %bb8 + %tmp12 = load i32* @global6, align 4 + %tmp13 = add nsw i32 %tmp12, 1 + store i32 %tmp13, i32* @global6, align 4 + %tmp14 = icmp slt i32 %tmp13, 42 + br i1 %tmp14, label %bb1, label %bb15 + +bb15: ; preds = %bb11, %bb8 + ret void +} + +define i32 @quux13() { +bb: + store i32 1, i32* @global5, align 4 + ret i32 1 +} + +define void @wombat() { +bb: + tail call void @zot12() + ret void +} + +define void @wombat14() { +bb: + tail call void @blam() + ret void +} + +define void @blam() { +bb: + store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4 + store i32 0, i32* @global9, align 4 + %tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4 + br label %bb1 + +bb1: ; preds = %bb1, %bb + %tmp2 = phi i32 [ 0, %bb ], [ %tmp11, %bb1 ] + %tmp3 = phi i32 [ %tmp, %bb ], [ %tmp10, %bb1 ] + %tmp4 = icmp sgt i32 %tmp3, 0 + %tmp5 = zext i1 %tmp4 to i32 + %tmp6 = urem i32 %tmp5, 5 + %tmp7 = mul i32 %tmp3, -80 + %tmp8 = or i32 %tmp7, %tmp6 + %tmp9 = icmp eq i32 %tmp8, 0 + %tmp10 = zext i1 %tmp9 to i32 + %tmp11 = add nsw i32 %tmp2, 1 + %tmp12 = icmp eq i32 %tmp11, 20 + br i1 %tmp12, label %bb13, label %bb1 + +bb13: ; preds = %bb1 + store i32 %tmp10, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4 + store i32 0, i32* @global10, align 4 + store i32 %tmp6, i32* @global11, align 4 + store i32 20, i32* @global9, align 4 + ret void +} |