aboutsummaryrefslogtreecommitdiff
path: root/lib/Analysis/InlineCost.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Analysis/InlineCost.cpp')
-rw-r--r--lib/Analysis/InlineCost.cpp1441
1 files changed, 898 insertions, 543 deletions
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index dedbfebea7..bc6c1687fd 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -11,659 +11,1014 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "inline-cost"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/CallingConv.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
-unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
- const CodeMetrics &Metrics, Value *V) {
- unsigned Reduction = 0;
- SmallVector<Value *, 4> Worklist;
- Worklist.push_back(V);
- do {
- Value *V = Worklist.pop_back_val();
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
- // We will be able to eliminate all but one of the successors.
- const TerminatorInst &TI = cast<TerminatorInst>(*U);
- const unsigned NumSucc = TI.getNumSuccessors();
- unsigned Instrs = 0;
- for (unsigned I = 0; I != NumSucc; ++I)
- Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I));
- // We don't know which blocks will be eliminated, so use the average size.
- Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
- continue;
- }
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+ typedef InstVisitor<CallAnalyzer, bool> Base;
+ friend class InstVisitor<CallAnalyzer, bool>;
+
+ // TargetData if available, or null.
+ const TargetData *const TD;
+
+ // The called function.
+ Function &F;
+
+ int Threshold;
+ int Cost;
+ const bool AlwaysInline;
+
+ bool IsRecursive;
+ bool ExposesReturnsTwice;
+ bool HasDynamicAlloca;
+ unsigned NumInstructions, NumVectorInstructions;
+ int FiftyPercentVectorBonus, TenPercentVectorBonus;
+ int VectorBonus;
+
+ // While we walk the potentially-inlined instructions, we build up and
+ // maintain a mapping of simplified values specific to this callsite. The
+ // idea is to propagate any special information we have about arguments to
+ // this call through the inlinable section of the function, and account for
+ // likely simplifications post-inlining. The most important aspect we track
+ // is CFG altering simplifications -- when we prove a basic block dead, that
+ // can cause dramatic shifts in the cost of inlining a function.
+ DenseMap<Value *, Constant *> SimplifiedValues;
+
+ // Keep track of the values which map back (through function arguments) to
+ // allocas on the caller stack which could be simplified through SROA.
+ DenseMap<Value *, Value *> SROAArgValues;
+
+ // The mapping of caller Alloca values to their accumulated cost savings. If
+ // we have to disable SROA for one of the allocas, this tells us how much
+ // cost must be added.
+ DenseMap<Value *, int> SROAArgCosts;
+
+ // Keep track of values which map to a pointer base and constant offset.
+ DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+ // Custom simplification helper routines.
+ bool isAllocaDerivedArg(Value *V);
+ bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+ DenseMap<Value *, int>::iterator &CostIt);
+ void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+ void disableSROA(Value *V);
+ void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+ ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+ // Custom analysis routines.
+ bool analyzeBlock(BasicBlock *BB);
+
+ // Disable several entry points to the visitor so we don't accidentally use
+ // them by declaring but not defining them here.
+ void visit(Module *); void visit(Module &);
+ void visit(Function *); void visit(Function &);
+ void visit(BasicBlock *); void visit(BasicBlock &);
+
+ // Provide base case for our instruction visit.
+ bool visitInstruction(Instruction &I);
+
+ // Our visit overrides.
+ bool visitAlloca(AllocaInst &I);
+ bool visitPHI(PHINode &I);
+ bool visitGetElementPtr(GetElementPtrInst &I);
+ bool visitBitCast(BitCastInst &I);
+ bool visitPtrToInt(PtrToIntInst &I);
+ bool visitIntToPtr(IntToPtrInst &I);
+ bool visitCastInst(CastInst &I);
+ bool visitUnaryInstruction(UnaryInstruction &I);
+ bool visitICmp(ICmpInst &I);
+ bool visitSub(BinaryOperator &I);
+ bool visitBinaryOperator(BinaryOperator &I);
+ bool visitLoad(LoadInst &I);
+ bool visitStore(StoreInst &I);
+ bool visitCallSite(CallSite CS);
+
+public:
+ CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
+ : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+ AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+ IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
+ }
- // Figure out if this instruction will be removed due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
+ bool analyzeCall(CallSite CS);
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
- if (!AllOperandsConstant)
- continue;
+ int getThreshold() { return Threshold; }
+ int getCost() { return Cost; }
- // We will get to remove this instruction...
- Reduction += InlineConstants::InstrCost;
+ // Keep a bunch of stats about the cost savings found so we can print them
+ // out when debugging.
+ unsigned NumConstantArgs;
+ unsigned NumConstantOffsetPtrArgs;
+ unsigned NumAllocaArgs;
+ unsigned NumConstantPtrCmps;
+ unsigned NumConstantPtrDiffs;
+ unsigned NumInstructionsSimplified;
+ unsigned SROACostSavings;
+ unsigned SROACostSavingsLost;
- // And any other instructions that use it which become constants
- // themselves.
- Worklist.push_back(&Inst);
- }
- } while (!Worklist.empty());
- return Reduction;
-}
+ void dump();
+};
-static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
- ICmpInst *ICI) {
- unsigned Reduction = 0;
+} // namespace
- // Bail if this is comparing against a non-constant; there is nothing we can
- // do there.
- if (!isa<Constant>(ICI->getOperand(1)))
- return Reduction;
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+ return SROAArgValues.count(V);
+}
- // An icmp pred (alloca, C) becomes true if the predicate is true when
- // equal and false otherwise.
- bool Result = ICI->isTrueWhenEqual();
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+ Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+ if (SROAArgValues.empty() || SROAArgCosts.empty())
+ return false;
- SmallVector<Instruction *, 4> Worklist;
- Worklist.push_back(ICI);
- do {
- Instruction *U = Worklist.pop_back_val();
- Reduction += InlineConstants::InstrCost;
- for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
- UI != UE; ++UI) {
- Instruction *I = dyn_cast<Instruction>(*UI);
- if (!I || I->mayHaveSideEffects()) continue;
- if (I->getNumOperands() == 1)
- Worklist.push_back(I);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- // If BO produces the same value as U, then the other operand is
- // irrelevant and we can put it into the Worklist to continue
- // deleting dead instructions. If BO produces the same value as the
- // other operand, we can delete BO but that's it.
- if (Result == true) {
- if (BO->getOpcode() == Instruction::Or)
- Worklist.push_back(I);
- if (BO->getOpcode() == Instruction::And)
- Reduction += InlineConstants::InstrCost;
- } else {
- if (BO->getOpcode() == Instruction::Or ||
- BO->getOpcode() == Instruction::Xor)
- Reduction += InlineConstants::InstrCost;
- if (BO->getOpcode() == Instruction::And)
- Worklist.push_back(I);
- }
- }
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
- if (BB->getSinglePredecessor())
- Reduction
- += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
- }
- }
- } while (!Worklist.empty());
+ DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+ if (ArgIt == SROAArgValues.end())
+ return false;
- return Reduction;
+ Arg = ArgIt->second;
+ CostIt = SROAArgCosts.find(Arg);
+ return CostIt != SROAArgCosts.end();
}
-/// \brief Compute the reduction possible for a given instruction if we are able
-/// to SROA an alloca.
+/// \brief Disable SROA for the candidate marked by this cost iterator.
///
-/// The reduction for this instruction is added to the SROAReduction output
-/// parameter. Returns false if this instruction is expected to defeat SROA in
-/// general.
-static bool countCodeReductionForSROAInst(Instruction *I,
- SmallVectorImpl<Value *> &Worklist,
- unsigned &SROAReduction) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!LI->isSimple())
- return false;
- SROAReduction += InlineConstants::InstrCost;
+/// This markes the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+ // If we're no longer able to perform SROA we need to undo its cost savings
+ // and prevent subsequent analysis.
+ Cost += CostIt->second;
+ SROACostSavings -= CostIt->second;
+ SROACostSavingsLost += CostIt->second;
+ SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+ disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ CostIt->second += InstructionCost;
+ SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ if (IsSROAValid) {
+ accumulateSROACost(CostIt, InstructionCost);
return true;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!SI->isSimple())
+ disableSROA(CostIt);
+ return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
return false;
- SROAReduction += InlineConstants::InstrCost;
- return true;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (!GEP->hasAllConstantIndices())
+ return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+ if (!TD)
+ return false;
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+ OpC = dyn_cast<ConstantInt>(SimpleOp);
+ if (!OpC)
return false;
- // A non-zero GEP will likely become a mask operation after SROA.
- if (GEP->hasAllZeroIndices())
- SROAReduction += InlineConstants::InstrCost;
- Worklist.push_back(GEP);
- return true;
- }
+ if (OpC->isZero()) continue;
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Worklist.push_back(BCI);
- SROAReduction += InlineConstants::InstrCost;
- return true;
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
+ return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+ // FIXME: Check whether inlining will turn a dynamic alloca into a static
+ // alloca, and handle that case.
- // We just look for non-constant operands to ICmp instructions as those will
- // defeat SROA. The actual reduction for these happens even without SROA.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
- return isa<Constant>(ICI->getOperand(1));
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- // SROA can handle a select of alloca iff all uses of the alloca are
- // loads, and dereferenceable. We assume it's dereferenceable since
- // we're told the input is an alloca.
- for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple())
+ // We will happily inline tatic alloca instructions or dynamic alloca
+ // instructions in always-inline situations.
+ if (AlwaysInline || I.isStaticAlloca())
+ return Base::visitAlloca(I);
+
+ // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+ // a variety of reasons, and so we would like to not inline them into
+ // functions which don't currently have a dynamic alloca. This simply
+ // disables inlining altogether in the presence of a dynamic alloca.
+ HasDynamicAlloca = true;
+ return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+ // FIXME: We should potentially be tracking values through phi nodes,
+ // especially when they collapse to a single value due to deleted CFG edges
+ // during inlining.
+
+ // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+ // though we don't want to propagate it's bonuses. The idea is to disable
+ // SROA if it *might* be used in an inappropriate manner.
+
+ // Phi nodes are always zero-cost.
+ return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+ SROAArg, CostIt);
+
+ // Try to fold GEPs of constant-offset call site argument pointers. This
+ // requires target data and inbounds GEPs.
+ if (TD && I.isInBounds()) {
+ // Check if we have a base + offset for the pointer.
+ Value *Ptr = I.getPointerOperand();
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+ if (BaseAndOffset.first) {
+ // Check if the offset of this GEP is constant, and if so accumulate it
+ // into Offset.
+ if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+ // Non-constant GEPs aren't folded, and disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
return false;
+ }
+
+ // Add the result as a new mapping to Base + Offset.
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also handle SROA candidates here, we already know that the GEP is
+ // all-constant indexed.
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ return true;
}
- // We don't know whether we'll be deleting the rest of the chain of
- // instructions from the SelectInst on, because we don't know whether
- // the other side of the select is also an alloca or not.
+ }
+
+ if (isGEPOffsetConstant(I)) {
+ if (SROACandidate)
+ SROAArgValues[&I] = SROAArg;
+
+ // Constant GEPs are modeled as free.
return true;
}
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::memset:
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // SROA can usually chew through these intrinsics.
- SROAReduction += InlineConstants::InstrCost;
+ // Variable GEPs will require math and will disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
+ return false;
+}
+
+bool CallAnalyzer::visitBitCast(BitCastInst &I) {
+ // Propagate constants through bitcasts.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
return true;
}
+
+ // Track base/offsets through casts
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ // Casts don't change the offset, just wrap it up.
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+
+ // Also look for SROA candidates here.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ // Bitcasts are always zero cost.
+ return true;
+}
+
+bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
+
+ // Track base/offset pairs when converted to a plain integer provided the
+ // integer is large enough to represent the pointer.
+ unsigned IntegerSize = I.getType()->getScalarSizeInBits();
+ if (TD && IntegerSize >= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset
+ = ConstantOffsetPtrs.lookup(I.getOperand(0));
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
}
- // If there is some other strange instruction, we're not going to be
- // able to do much if we inline this.
- return false;
+ // This is really weird. Technically, ptrtoint will disable SROA. However,
+ // unless that ptrtoint is *used* somewhere in the live basic blocks after
+ // inlining, it will be nuked, and SROA should proceed. All of the uses which
+ // would block SROA would also block SROA if applied directly to a pointer,
+ // and so we can just add the integer in here. The only places where SROA is
+ // preserved either cannot fire on an integer, or won't in-and-of themselves
+ // disable SROA (ext) w/o some later use that we would see and disable.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
+
+ // A ptrtoint cast is free so long as the result is large enough to store the
+ // pointer, and a legal integer type.
+ return TD && TD->isLegalInteger(IntegerSize) &&
+ IntegerSize >= TD->getPointerSizeInBits();
}
-unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForAlloca(
- const CodeMetrics &Metrics, Value *V) {
- if (!V->getType()->isPointerTy()) return 0; // Not a pointer
- unsigned Reduction = 0;
- unsigned SROAReduction = 0;
- bool CanSROAAlloca = true;
+bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
- SmallVector<Value *, 4> Worklist;
- Worklist.push_back(V);
- do {
- Value *V = Worklist.pop_back_val();
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI){
- Instruction *I = cast<Instruction>(*UI);
+ // Track base/offset pairs when round-tripped through a pointer without
+ // modifications provided the integer is not too large.
+ Value *Op = I.getOperand(0);
+ unsigned IntegerSize = Op->getType()->getScalarSizeInBits();
+ if (TD && IntegerSize <= TD->getPointerSizeInBits()) {
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Op);
+ if (BaseAndOffset.first)
+ ConstantOffsetPtrs[&I] = BaseAndOffset;
+ }
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
- Reduction += countCodeReductionForAllocaICmp(Metrics, ICI);
+ // "Propagate" SROA here in the same manner as we do for ptrtoint above.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(Op, SROAArg, CostIt))
+ SROAArgValues[&I] = SROAArg;
- if (CanSROAAlloca)
- CanSROAAlloca = countCodeReductionForSROAInst(I, Worklist,
- SROAReduction);
+ // An inttoptr cast is free so long as the input is a legal integer type
+ // which doesn't contain values outside the range of a pointer.
+ return TD && TD->isLegalInteger(IntegerSize) &&
+ IntegerSize <= TD->getPointerSizeInBits();
+}
+
+bool CallAnalyzer::visitCastInst(CastInst &I) {
+ // Propagate constants through ptrtoint.
+ if (Constant *COp = dyn_cast<Constant>(I.getOperand(0)))
+ if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+ SimplifiedValues[&I] = C;
+ return true;
}
- } while (!Worklist.empty());
- return Reduction + (CanSROAAlloca ? SROAReduction : 0);
+ // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere.
+ disableSROA(I.getOperand(0));
+
+ // No-op casts don't have any cost.
+ if (I.isLosslessCast())
+ return true;
+
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (TD && isa<TruncInst>(I) &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(I.getType())))
+ return true;
+
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // no-ops on most sane targets.
+ if (isa<CmpInst>(I.getOperand(0)))
+ return true;
+
+ // Assume the rest of the casts require work.
+ return false;
}
-void InlineCostAnalyzer::FunctionInfo::countCodeReductionForPointerPair(
- const CodeMetrics &Metrics, DenseMap<Value *, unsigned> &PointerArgs,
- Value *V, unsigned ArgIdx) {
- SmallVector<Value *, 4> Worklist;
- Worklist.push_back(V);
- do {
- Value *V = Worklist.pop_back_val();
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
- UI != E; ++UI){
- Instruction *I = cast<Instruction>(*UI);
-
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (!GEP->hasAllConstantIndices())
- continue;
- // Unless the GEP is in-bounds, some comparisons will be non-constant.
- // Fortunately, the real-world cases where this occurs uses in-bounds
- // GEPs, and so we restrict the optimization to them here.
- if (!GEP->isInBounds())
- continue;
+bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) {
+ Value *Operand = I.getOperand(0);
+ Constant *Ops[1] = { dyn_cast<Constant>(Operand) };
+ if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand)))
+ if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(),
+ Ops, TD)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
- // Constant indices just change the constant offset. Add the resulting
- // value both to our worklist for this argument, and to the set of
- // viable paired values with future arguments.
- PointerArgs[GEP] = ArgIdx;
- Worklist.push_back(GEP);
- continue;
+ // Disable any SROA on the argument to arbitrary unary operators.
+ disableSROA(Operand);
+
+ return false;
+}
+
+bool CallAnalyzer::visitICmp(ICmpInst &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ // First try to handle simplified comparisons.
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ return true;
}
- // Track pointer through casts. Even when the result is not a pointer, it
- // remains a constant relative to constants derived from other constant
- // pointers.
- if (CastInst *CI = dyn_cast<CastInst>(I)) {
- PointerArgs[CI] = ArgIdx;
- Worklist.push_back(CI);
- continue;
+ // Otherwise look for a comparison between constant offset pointers with
+ // a common base.
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the icmp to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrCmps;
+ return true;
}
+ }
+ }
- // There are two instructions which produce a strict constant value when
- // applied to two related pointer values. Ignore everything else.
- if (!isa<ICmpInst>(I) && I->getOpcode() != Instruction::Sub)
- continue;
- assert(I->getNumOperands() == 2);
-
- // Ensure that the two operands are in our set of potentially paired
- // pointers (or are derived from them).
- Value *OtherArg = I->getOperand(0);
- if (OtherArg == V)
- OtherArg = I->getOperand(1);
- DenseMap<Value *, unsigned>::const_iterator ArgIt
- = PointerArgs.find(OtherArg);
- if (ArgIt == PointerArgs.end())
- continue;
- std::pair<unsigned, unsigned> ArgPair(ArgIt->second, ArgIdx);
- if (ArgPair.first > ArgPair.second)
- std::swap(ArgPair.first, ArgPair.second);
+ // If the comparison is an equality comparison with null, we can simplify it
+ // for any alloca-derived argument.
+ if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)))
+ if (isAllocaDerivedArg(I.getOperand(0))) {
+ // We can actually predict the result of comparisons between an
+ // alloca-derived value and null. Note that this fires regardless of
+ // SROA firing.
+ bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+ SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+ : ConstantInt::getFalse(I.getType());
+ return true;
+ }
- PointerArgPairWeights[ArgPair]
- += countCodeReductionForConstant(Metrics, I);
+ // Finally check for SROA candidates in comparisons.
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (isa<ConstantPointerNull>(I.getOperand(1))) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
}
- } while (!Worklist.empty());
-}
-/// analyzeFunction - Fill in the current structure with information gleaned
-/// from the specified function.
-void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F,
- const TargetData *TD) {
- Metrics.analyzeFunction(F, TD);
-
- // A function with exactly one return has it removed during the inlining
- // process (see InlineFunction), so don't count it.
- // FIXME: This knowledge should really be encoded outside of FunctionInfo.
- if (Metrics.NumRets==1)
- --Metrics.NumInsts;
-
- ArgumentWeights.reserve(F->arg_size());
- DenseMap<Value *, unsigned> PointerArgs;
- unsigned ArgIdx = 0;
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
- ++I, ++ArgIdx) {
- // Count how much code can be eliminated if one of the arguments is
- // a constant or an alloca.
- ArgumentWeights.push_back(ArgInfo(countCodeReductionForConstant(Metrics, I),
- countCodeReductionForAlloca(Metrics, I)));
-
- // If the argument is a pointer, also check for pairs of pointers where
- // knowing a fixed offset between them allows simplification. This pattern
- // arises mostly due to STL algorithm patterns where pointers are used as
- // random access iterators.
- if (!I->getType()->isPointerTy())
- continue;
- PointerArgs[I] = ArgIdx;
- countCodeReductionForPointerPair(Metrics, PointerArgs, I, ArgIdx);
+ disableSROA(CostIt);
}
-}
-/// NeverInline - returns true if the function should never be inlined into
-/// any caller
-bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
- return (Metrics.exposesReturnsTwice || Metrics.isRecursive ||
- Metrics.containsIndirectBr);
+ return false;
}
-// ConstantFunctionBonus - Figure out how much of a bonus we can get for
-// possibly devirtualizing a function. We'll subtract the size of the function
-// we may wish to inline from the indirect call bonus providing a limit on
-// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
-// inlining because we decide we don't want to give a bonus for
-// devirtualizing.
-int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+bool CallAnalyzer::visitSub(BinaryOperator &I) {
+ // Try to handle a special case: we can fold computing the difference of two
+ // constant-related pointers.
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Value *LHSBase, *RHSBase;
+ APInt LHSOffset, RHSOffset;
+ llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS);
+ if (LHSBase) {
+ llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS);
+ if (RHSBase && LHSBase == RHSBase) {
+ // We have common bases, fold the subtract to a constant based on the
+ // offsets.
+ Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset);
+ Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset);
+ if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) {
+ SimplifiedValues[&I] = C;
+ ++NumConstantPtrDiffs;
+ return true;
+ }
+ }
+ }
- // This could just be NULL.
- if (!C) return 0;
+ // Otherwise, fall back to the generic logic for simplifying and handling
+ // instructions.
+ return Base::visitSub(I);
+}
+
+bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) {
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (!isa<Constant>(LHS))
+ if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+ LHS = SimpleLHS;
+ if (!isa<Constant>(RHS))
+ if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+ RHS = SimpleRHS;
+ Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD);
+ if (Constant *C = dyn_cast_or_null<Constant>(SimpleV)) {
+ SimplifiedValues[&I] = C;
+ return true;
+ }
- Function *F = dyn_cast<Function>(C);
- if (!F) return 0;
+ // Disable any SROA on arguments to arbitrary, unsimplified binary operators.
+ disableSROA(LHS);
+ disableSROA(RHS);
- int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
- return (Bonus > 0) ? 0 : Bonus;
+ return false;
}
-// CountBonusForConstant - Figure out an approximation for how much per-call
-// performance boost we can expect if the specified value is constant.
-int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
- unsigned Bonus = 0;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (CallInst *CI = dyn_cast<CallInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (CI->getCalledValue() == V)
- Bonus += ConstantFunctionBonus(CallSite(CI), C);
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
- // Turning an indirect call into a direct call is a BIG win
- if (II->getCalledValue() == V)
- Bonus += ConstantFunctionBonus(CallSite(II), C);
+bool CallAnalyzer::visitLoad(LoadInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
}
- // FIXME: Eliminating conditional branches and switches should
- // also yield a per-call performance boost.
- else {
- // Figure out the bonuses that wll accrue due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
+ disableSROA(CostIt);
+ }
- if (AllOperandsConstant)
- Bonus += CountBonusForConstant(&Inst);
+ return false;
+}
+
+bool CallAnalyzer::visitStore(StoreInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) {
+ if (I.isSimple()) {
+ accumulateSROACost(CostIt, InlineConstants::InstrCost);
+ return true;
}
+
+ disableSROA(CostIt);
}
- return Bonus;
+ return false;
}
-int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
- // Get information about the callee.
- FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
-
- // If we haven't calculated this information yet, do so now.
- if (CalleeFI->Metrics.NumBlocks == 0)
- CalleeFI->analyzeFunction(Callee, TD);
-
- // InlineCost - This value measures how good of an inline candidate this call
- // site is to inline. A lower inline cost make is more likely for the call to
- // be inlined. This value may go negative.
- //
- int InlineCost = 0;