aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChandler Carruth <chandlerc@gmail.com>2012-03-25 04:03:40 +0000
committerChandler Carruth <chandlerc@gmail.com>2012-03-25 04:03:40 +0000
commitd54f9a4c3bcdb247ea4aa311251c19242b03be63 (patch)
treeb8a2820b65d1697636eb7ea8ac4505ca943aa1cf
parentacdae3e25a03e4e08039cb18f50b7788f71c0b2e (diff)
Move the instruction simplification of callsite arguments in the inliner
to instead rely on much more generic and powerful instruction simplification in the function cloner (and thus inliner). This teaches the pruning function cloner to use instsimplify rather than just the constant folder to fold values during cloning. This can simplify a large number of things that constant folding alone cannot begin to touch. For example, it will realize that 'or' and 'and' instructions with certain constant operands actually become constants regardless of what their other operand is. It also can thread back through the caller to perform simplifications that are only possible by looking up a few levels. In particular, GEPs and pointer testing tend to fold much more heavily with this change. This should (in some cases) have a positive impact on compile times with optimizations on because the inliner itself will simply avoid cloning a great deal of code. It already attempted to prune proven-dead code, but now it will be use the stronger simplifications to prove more code dead. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153403 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/IPO/Inliner.cpp37
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp86
-rw-r--r--test/Transforms/Inline/inline_cleanup.ll45
3 files changed, 79 insertions, 89 deletions
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9975333976..048a1a0560 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,7 +19,6 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -335,38 +334,6 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
return false;
}
-/// \brief Simplify arguments going into a particular callsite.
-///
-/// This is important to do each time we add a callsite due to inlining so that
-/// constants and other entities which feed into inline cost estimation are
-/// properly recognized when analyzing the new callsite. Consider:
-/// void outer(int x) {
-/// if (x < 42)
-/// return inner(42 - x);
-/// ...
-/// }
-/// void inner(int x) {
-/// ...
-/// }
-///
-/// The inliner gives calls to 'outer' with a constant argument a bonus because
-/// it will delete one side of a branch. But the resulting call to 'inner'
-/// will, after inlining, also have a constant operand. We need to do just
-/// enough constant folding to expose this for callsite arguments. The rest
-/// will be taken care of after the inliner finishes running.
-static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) {
- // FIXME: It would be nice to avoid this smallvector if RAUW doesn't
- // invalidate operand iterators in any cases.
- SmallVector<std::pair<Value *, Value*>, 4> SimplifiedArgs;
- for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
- I != E; ++I)
- if (Instruction *Inst = dyn_cast<Instruction>(*I))
- if (Value *SimpleArg = SimplifyInstruction(Inst, TD))
- SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg));
- for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx)
- SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second);
-}
-
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraph>();
const TargetData *TD = getAnalysisIfAvailable<TargetData>();
@@ -494,9 +461,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
i != e; ++i) {
Value *Ptr = InlineInfo.InlinedCalls[i];
- CallSite NewCS = Ptr;
- simplifyCallSiteArguments(TD, NewCS);
- CallSites.push_back(std::make_pair(NewCS, NewHistoryID));
+ CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
}
}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1b28c35238..a83c4e66ca 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
#include <map>
@@ -218,11 +219,6 @@ namespace {
/// anything that it can reach.
void CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone);
-
- public:
- /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
- /// mapping its operands through VMap if they are available.
- Constant *ConstantFoldMappedInstruction(const Instruction *I);
};
}
@@ -262,19 +258,33 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
II != IE; ++II) {
- // If this instruction constant folds, don't bother cloning the instruction,
- // instead, just add the constant to the value map.
- if (Constant *C = ConstantFoldMappedInstruction(II)) {
- VMap[II] = C;
- continue;
+ Instruction *NewInst = II->clone();
+
+ // Eagerly remap operands to the newly cloned instruction, except for PHI
+ // nodes for which we defer processing until we update the CFG.
+ if (!isa<PHINode>(NewInst)) {
+ RemapInstruction(NewInst, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+
+ // If we can simplify this instruction to some other value, simply add
+ // a mapping to that value rather than inserting a new instruction into
+ // the basic block.
+ if (Value *V = SimplifyInstruction(NewInst, TD)) {
+ // On the off-chance that this simplifies to an instruction in the old
+ // function, map it back into the new function.
+ if (Value *MappedV = VMap.lookup(V))
+ V = MappedV;
+
+ VMap[II] = V;
+ delete NewInst;
+ continue;
+ }
}
- Instruction *NewInst = II->clone();
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
- NewBB->getInstList().push_back(NewInst);
VMap[II] = NewInst; // Add instruction map to value.
-
+ NewBB->getInstList().push_back(NewInst);
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
if (isa<ConstantInt>(AI->getArraySize()))
@@ -345,30 +355,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
Returns.push_back(RI);
}
-/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through VMap if they are available.
-Constant *PruningFunctionCloner::
-ConstantFoldMappedInstruction(const Instruction *I) {
- SmallVector<Constant*, 8> Ops;
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
- Ops.push_back(Op);
- else
- return 0; // All operands not constant!
-
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
- TD);
-
- if (const LoadInst *LI = dyn_cast<LoadInst>(I))
- if (!LI->isVolatile())
- return ConstantFoldLoadFromConstPtr(Ops[0], TD);
-
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD);
-}
-
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
/// effect of this is to copy significantly less code in cases where (for
@@ -418,25 +404,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
-
- // Loop over all of the instructions in the block, fixing up operand
- // references as we go. This uses VMap to do all the hard work.
- //
- BasicBlock::iterator I = NewBB->begin();
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
- if (PHINode *PN = dyn_cast<PHINode>(I)) {
- // Skip over all PHI nodes, remembering them for later.
- BasicBlock::const_iterator OldI = BI->begin();
- for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
- PHIToResolve.push_back(cast<PHINode>(OldI));
- }
-
- // Otherwise, remap the rest of the instructions normally.
- for (; I != NewBB->end(); ++I)
- RemapInstruction(I, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I)
+ if (const PHINode *PN = dyn_cast<PHINode>(I))
+ PHIToResolve.push_back(PN);
+ else
+ break;
+
+ // Finally, remap the terminator instructions, as those can't be remapped
+ // until all BBs are mapped.
+ RemapInstruction(NewBB->getTerminator(), VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
diff --git a/test/Transforms/Inline/inline_cleanup.ll b/test/Transforms/Inline/inline_cleanup.ll
index b744ae1ef3..7583dddea9 100644
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@@ -71,3 +71,48 @@ entry:
tail call fastcc void @foo( i32 8 )
ret void
}
+
+declare void @f(i32 %x)
+
+define void @inner2(i32 %x, i32 %y, i32 %z) {
+entry:
+ %cmp1 = icmp ne i32 %x, 0
+ br i1 %cmp1, label %then1, label %end1
+
+then1:
+ call void @f(i32 %x)
+ br label %end1
+
+end1:
+ %x2 = and i32 %x, %z
+ %cmp2 = icmp sgt i32 %x2, 1
+ br i1 %cmp2, label %then2, label %end2
+
+then2:
+ call void @f(i32 %x2)
+ br label %end2
+
+end2:
+ %y2 = or i32 %y, %z
+ %cmp3 = icmp sgt i32 %y2, 0
+ br i1 %cmp3, label %then3, label %end3
+
+then3:
+ call void @f(i32 %y2)
+ br label %end3
+
+end3:
+ ret void
+}
+
+define void @outer2(i32 %z) {
+; Ensure that after inlining, none of the blocks with a call to @f actually
+; make it through inlining.
+; CHECK: define void @outer2
+; CHECK-NOT: call
+; CHECK: ret void
+
+entry:
+ call void @inner2(i32 0, i32 -1, i32 %z)
+ ret void
+}