aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Analysis/CodeMetrics.h4
-rw-r--r--include/llvm/Analysis/InlineCost.h178
-rw-r--r--include/llvm/Transforms/IPO/InlinerPass.h5
-rw-r--r--lib/Analysis/CodeMetrics.cpp88
-rw-r--r--lib/Analysis/InlineCost.cpp1441
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp5
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp5
-rw-r--r--lib/Transforms/IPO/Inliner.cpp53
-rw-r--r--test/Transforms/Inline/alloca-bonus.ll41
-rw-r--r--test/Transforms/Inline/dynamic_alloca_test.ll5
-rw-r--r--test/Transforms/Inline/inline_constprop.ll123
-rw-r--r--test/Transforms/Inline/noinline-recursive-fn.ll37
-rw-r--r--test/Transforms/Inline/ptr-diff.ll2
13 files changed, 1189 insertions, 798 deletions
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 033e19bc7f..7116078349 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -20,9 +20,13 @@
namespace llvm {
class BasicBlock;
class Function;
+ class Instruction;
class TargetData;
class Value;
+ /// \brief Check whether an instruction is likely to be "free" when lowered.
+ bool isInstructionFree(const Instruction *I, const TargetData *TD = 0);
+
/// \brief Check whether a call will lower to something small.
///
/// This tests checks whether calls to this function will lower to something
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index c804c46528..c523890472 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -16,6 +16,7 @@
#include "llvm/Function.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/ValueMap.h"
#include "llvm/Analysis/CodeMetrics.h"
#include <cassert>
@@ -25,162 +26,105 @@
namespace llvm {
class CallSite;
- template<class PtrType, unsigned SmallSize>
- class SmallPtrSet;
class TargetData;
namespace InlineConstants {
// Various magic constants used to adjust heuristics.
const int InstrCost = 5;
- const int IndirectCallBonus = -100;
+ const int IndirectCallThreshold = 100;
const int CallPenalty = 25;
const int LastCallToStaticBonus = -15000;
const int ColdccPenalty = 2000;
const int NoreturnPenalty = 10000;
}
- /// InlineCost - Represent the cost of inlining a function. This
- /// supports special values for functions which should "always" or
- /// "never" be inlined. Otherwise, the cost represents a unitless
- /// amount; smaller values increase the likelihood of the function
- /// being inlined.
+ /// \brief Represents the cost of inlining a function.
+ ///
+ /// This supports special values for functions which should "always" or
+ /// "never" be inlined. Otherwise, the cost represents a unitless amount;
+ /// smaller values increase the likelihood of the function being inlined.
+ ///
+ /// Objects of this type also provide the adjusted threshold for inlining
+ /// based on the information available for a particular callsite. They can be
+ /// directly tested to determine if inlining should occur given the cost and
+ /// threshold for this cost metric.
class InlineCost {
- enum Kind {
- Value,
- Always,
- Never
+ enum CostKind {
+ CK_Variable,
+ CK_Always,
+ CK_Never
};
- // This is a do-it-yourself implementation of
- // int Cost : 30;
- // unsigned Type : 2;
- // We used to use bitfields, but they were sometimes miscompiled (PR3822).
- enum { TYPE_BITS = 2 };
- enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS };
- unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS;
+ const int Cost : 30; // The inlining cost if neither always nor never.
+ const unsigned Kind : 2; // The type of cost, one of CostKind above.
- Kind getType() const {
- return Kind(TypedCost >> COST_BITS);
- }
+ /// \brief The adjusted threshold against which this cost should be tested.
+ const int Threshold;
- int getCost() const {
- // Sign-extend the bottom COST_BITS bits.
- return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS;
+ // Trivial constructor, interesting logic in the factory functions below.
+ InlineCost(int Cost, CostKind Kind, int Threshold)
+ : Cost(Cost), Kind(Kind), Threshold(Threshold) {}
+
+ public:
+ static InlineCost get(int Cost, int Threshold) {
+ InlineCost Result(Cost, CK_Variable, Threshold);
+ assert(Result.Cost == Cost && "Cost exceeds InlineCost precision");
+ return Result;
+ }
+ static InlineCost getAlways() {
+ return InlineCost(0, CK_Always, 0);
+ }
+ static InlineCost getNever() {
+ return InlineCost(0, CK_Never, 0);
}
- InlineCost(int C, int T) {
- TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS);
- assert(getCost() == C && "Cost exceeds InlineCost precision");
+ /// \brief Test whether the inline cost is low enough for inlining.
+ operator bool() const {
+ if (isAlways()) return true;
+ if (isNever()) return false;
+ return Cost < Threshold;
}
- public:
- static InlineCost get(int Cost) { return InlineCost(Cost, Value); }
- static InlineCost getAlways() { return InlineCost(0, Always); }
- static InlineCost getNever() { return InlineCost(0, Never); }
- bool isVariable() const { return getType() == Value; }
- bool isAlways() const { return getType() == Always; }
- bool isNever() const { return getType() == Never; }
+ bool isVariable() const { return Kind == CK_Variable; }
+ bool isAlways() const { return Kind == CK_Always; }
+ bool isNever() const { return Kind == CK_Never; }
- /// getValue() - Return a "variable" inline cost's amount. It is
+ /// getCost() - Return a "variable" inline cost's amount. It is
/// an error to call this on an "always" or "never" InlineCost.
- int getValue() const {
- assert(getType() == Value && "Invalid access of InlineCost");
- return getCost();
+ int getCost() const {
+ assert(Kind == CK_Variable && "Invalid access of InlineCost");
+ return Cost;
+ }
+
+ /// \brief Get the cost delta from the threshold for inlining.
+ /// Only valid if the cost is of the variable kind. Returns a negative
+ /// value if the cost is too high to inline.
+ int getCostDelta() const {
+ return Threshold - getCost();
}
};
/// InlineCostAnalyzer - Cost analyzer used by inliner.
class InlineCostAnalyzer {
- struct ArgInfo {
- public:
- unsigned ConstantWeight;
- unsigned AllocaWeight;
-
- ArgInfo(unsigned CWeight, unsigned AWeight)
- : ConstantWeight(CWeight), AllocaWeight(AWeight)
- {}
- };
-
- struct FunctionInfo {
- CodeMetrics Metrics;
-
- /// ArgumentWeights - Each formal argument of the function is inspected to
- /// see if it is used in any contexts where making it a constant or alloca
- /// would reduce the code size. If so, we add some value to the argument
- /// entry here.
- std::vector<ArgInfo> ArgumentWeights;
-
- /// PointerArgPairWeights - Weights to use when giving an inline bonus to
- /// a call site due to correlated pairs of pointers.
- DenseMap<std::pair<unsigned, unsigned>, unsigned> PointerArgPairWeights;
-
- /// countCodeReductionForConstant - Figure out an approximation for how
- /// many instructions will be constant folded if the specified value is
- /// constant.
- unsigned countCodeReductionForConstant(const CodeMetrics &Metrics,
- Value *V);
-
- /// countCodeReductionForAlloca - Figure out an approximation of how much
- /// smaller the function will be if it is inlined into a context where an
- /// argument becomes an alloca.
- unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics,
- Value *V);
-
- /// countCodeReductionForPointerPair - Count the bonus to apply to an
- /// inline call site where a pair of arguments are pointers and one
- /// argument is a constant offset from the other. The idea is to
- /// recognize a common C++ idiom where a begin and end iterator are
- /// actually pointers, and many operations on the pair of them will be
- /// constants if the function is called with arguments that have
- /// a constant offset.
- void countCodeReductionForPointerPair(
- const CodeMetrics &Metrics,
- DenseMap<Value *, unsigned> &PointerArgs,
- Value *V, unsigned ArgIdx);
-
- /// analyzeFunction - Add information about the specified function
- /// to the current structure.
- void analyzeFunction(Function *F, const TargetData *TD);
-
- /// NeverInline - Returns true if the function should never be
- /// inlined into any caller.
- bool NeverInline();
- };
-
- // The Function* for a function can be changed (by ArgumentPromotion);
- // the ValueMap will update itself when this happens.
- ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
-
// TargetData if available, or null.
const TargetData *TD;
- int CountBonusForConstant(Value *V, Constant *C = NULL);
- int ConstantFunctionBonus(CallSite CS, Constant *C);
- int getInlineSize(CallSite CS, Function *Callee);
- int getInlineBonuses(CallSite CS, Function *Callee);
public:
InlineCostAnalyzer(): TD(0) {}
void setTargetData(const TargetData *TData) { TD = TData; }
- /// getInlineCost - The heuristic used to determine if we should inline the
- /// function call or not.
+ /// \brief Get an InlineCost object representing the cost of inlining this
+ /// callsite.
///
- InlineCost getInlineCost(CallSite CS);
- /// getCalledFunction - The heuristic used to determine if we should inline
- /// the function call or not. The callee is explicitly specified, to allow
- /// you to calculate the cost of inlining a function via a pointer. The
- /// result assumes that the inlined version will always be used. You should
- /// weight it yourself in cases where this callee will not always be called.
- InlineCost getInlineCost(CallSite CS, Function *Callee);
-
- /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
- /// higher threshold to determine if the function call should be inlined.
- float getInlineFudgeFactor(CallSite CS);
+ /// Note that threshold is passed into this function. Only costs below the
+ /// threshold are computed with any accuracy. The threshold can be used to
+ /// bound the computation necessary to determine whether the cost is
+ /// sufficiently low to warrant inlining.
+ InlineCost getInlineCost(CallSite CS, int Threshold);
/// resetCachedFunctionInfo - erase any cached cost info for this function.
void resetCachedCostInfo(Function* Caller) {
- CachedFunctionInfo[Caller] = FunctionInfo();
}
/// growCachedCostInfo - update the cached cost info for Caller after Callee
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index f59479d738..bdc02fff73 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -65,11 +65,6 @@ struct Inliner : public CallGraphSCCPass {
///
virtual InlineCost getInlineCost(CallSite CS) = 0;
- // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
- // higher threshold to determine if the function call should be inlined.
- ///
- virtual float getInlineFudgeFactor(CallSite CS) = 0;
-
/// resetCachedCostInfo - erase any cached cost data from the derived class.
/// If the derived class has no such data this can be empty.
///
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 6c93f78629..316e7bc934 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -50,6 +50,52 @@ bool llvm::callIsSmall(const Function *F) {
return false;
}
+bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) {
+ if (isa<PHINode>(I))
+ return true;
+
+ // If a GEP has all constant indices, it will probably be folded with
+ // a load/store.
+ if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+ return GEP->hasAllConstantIndices();
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ // These intrinsics don't count as size.
+ return true;
+ }
+ }
+
+ if (const CastInst *CI = dyn_cast<CastInst>(I)) {
+ // Noop casts, including ptr <-> int, don't count.
+ if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI))
+ return true;
+ // trunc to a native type is free (assuming the target has compare and
+ // shift-right of the same width).
+ if (TD && isa<TruncInst>(CI) &&
+ TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+ return true;
+ // Result of a cmp instruction is often extended (to be used by other
+ // cmp instructions, logical or return instructions). These are usually
+ // nop on most sane targets.
+ if (isa<CmpInst>(CI->getOperand(0)))
+ return true;
+ }
+
+ return false;
+}
+
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
@@ -58,27 +104,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
- if (isa<PHINode>(II)) continue; // PHI nodes don't count.
+ if (isInstructionFree(II, TD))
+ continue;
// Special handling for calls.
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
- if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
- switch (IntrinsicI->getIntrinsicID()) {
- default: break;
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- // These intrinsics don't count as size.
- continue;
- }
- }
-
ImmutableCallSite CS(cast<Instruction>(II));
if (const Function *F = CS.getCalledFunction()) {
@@ -115,28 +145,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
- if (const CastInst *CI = dyn_cast<CastInst>(II)) {
- // Noop casts, including ptr <-> int, don't count.
- if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
- isa<PtrToIntInst>(CI))
- continue;
- // trunc to a native type is free (assuming the target has compare and
- // shift-right of the same width).
- if (isa<TruncInst>(CI) && TD &&
- TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
- continue;
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- continue;
- } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
- // If a GEP has all constant indices, it will probably be folded with
- // a load/store.
- if (GEPI->hasAllConstantIndices())
- continue;
- }
-
++NumInsts;
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index dedbfebea7..bc6c1687fd 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -11,659 +11,1014 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "inline-cost"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/CallingConv.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SmallPtrSet.h"
using namespace llvm;
-unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
- const CodeMetrics &Metrics, Value *V) {
- unsigned Reduction = 0;
- SmallVector<Value *, 4> Worklist;
- Worklist.push_back(V);
- do {
- Value *V = Worklist.pop_back_val();
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- User *U = *UI;
- if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
- // We will be able to eliminate all but one of the successors.
- const TerminatorInst &TI = cast<TerminatorInst>(*U);
- const unsigned NumSucc = TI.getNumSuccessors();
- unsigned Instrs = 0;
- for (unsigned I = 0; I != NumSucc; ++I)
- Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I));
- // We don't know which blocks will be eliminated, so use the average size.
- Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
- continue;
- }
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+ typedef InstVisitor<CallAnalyzer, bool> Base;
+ friend class InstVisitor<CallAnalyzer, bool>;
+
+ // TargetData if available, or null.
+ const TargetData *const TD;
+
+ // The called function.
+ Function &F;
+
+ int Threshold;
+ int Cost;
+ const bool AlwaysInline;
+
+ bool IsRecursive;
+ bool ExposesReturnsTwice;
+ bool HasDynamicAlloca;
+ unsigned NumInstructions, NumVectorInstructions;
+ int FiftyPercentVectorBonus, TenPercentVectorBonus;
+ int VectorBonus;
+
+ // While we walk the potentially-inlined instructions, we build up and
+ // maintain a mapping of simplified values specific to this callsite. The
+ // idea is to propagate any special information we have about arguments to
+ // this call through the inlinable section of the function, and account for
+ // likely simplifications post-inlining. The most important aspect we track
+ // is CFG altering simplifications -- when we prove a basic block dead, that
+ // can cause dramatic shifts in the cost of inlining a function.
+ DenseMap<Value *, Constant *> SimplifiedValues;
+
+ // Keep track of the values which map back (through function arguments) to
+ // allocas on the caller stack which could be simplified through SROA.
+ DenseMap<Value *, Value *> SROAArgValues;
+
+ // The mapping of caller Alloca values to their accumulated cost savings. If
+ // we have to disable SROA for one of the allocas, this tells us how much
+ // cost must be added.
+ DenseMap<Value *, int> SROAArgCosts;
+
+ // Keep track of values which map to a pointer base and constant offset.
+ DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+ // Custom simplification helper routines.
+ bool isAllocaDerivedArg(Value *V);
+ bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+ DenseMap<Value *, int>::iterator &CostIt);
+ void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+ void disableSROA(Value *V);
+ void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost);
+ bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+ bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+ ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+ // Custom analysis routines.
+ bool analyzeBlock(BasicBlock *BB);
+
+ // Disable several entry points to the visitor so we don't accidentally use
+ // them by declaring but not defining them here.
+ void visit(Module *); void visit(Module &);
+ void visit(Function *); void visit(Function &);
+ void visit(BasicBlock *); void visit(BasicBlock &);
+
+ // Provide base case for our instruction visit.
+ bool visitInstruction(Instruction &I);
+
+ // Our visit overrides.
+ bool visitAlloca(AllocaInst &I);
+ bool visitPHI(PHINode &I);
+ bool visitGetElementPtr(GetElementPtrInst &I);
+ bool visitBitCast(BitCastInst &I);
+ bool visitPtrToInt(PtrToIntInst &I);
+ bool visitIntToPtr(IntToPtrInst &I);
+ bool visitCastInst(CastInst &I);
+ bool visitUnaryInstruction(UnaryInstruction &I);
+ bool visitICmp(ICmpInst &I);
+ bool visitSub(BinaryOperator &I);
+ bool visitBinaryOperator(BinaryOperator &I);
+ bool visitLoad(LoadInst &I);
+ bool visitStore(StoreInst &I);
+ bool visitCallSite(CallSite CS);
+
+public:
+ CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
+ : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+ AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+ IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+ NumInstructions(0), NumVectorInstructions(0),
+ FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+ NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+ NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+ NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
+ }
- // Figure out if this instruction will be removed due to simple constant
- // propagation.
- Instruction &Inst = cast<Instruction>(*U);
-
- // We can't constant propagate instructions which have effects or
- // read memory.
- //
- // FIXME: It would be nice to capture the fact that a load from a
- // pointer-to-constant-global is actually a *really* good thing to zap.
- // Unfortunately, we don't know the pointer that may get propagated here,
- // so we can't make this decision.
- if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
- isa<AllocaInst>(Inst))
- continue;
+ bool analyzeCall(CallSite CS);
- bool AllOperandsConstant = true;
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
- if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
- AllOperandsConstant = false;
- break;
- }
- if (!AllOperandsConstant)
- continue;
+ int getThreshold() { return Threshold; }
+ int getCost() { return Cost; }
- // We will get to remove this instruction...
- Reduction += InlineConstants::InstrCost;
+ // Keep a bunch of stats about the cost savings found so we can print them
+ // out when debugging.
+ unsigned NumConstantArgs;
+ unsigned NumConstantOffsetPtrArgs;
+ unsigned NumAllocaArgs;
+ unsigned NumConstantPtrCmps;
+ unsigned NumConstantPtrDiffs;
+ unsigned NumInstructionsSimplified;
+ unsigned SROACostSavings;
+ unsigned SROACostSavingsLost;
- // And any other instructions that use it which become constants
- // themselves.
- Worklist.push_back(&Inst);
- }
- } while (!Worklist.empty());
- return Reduction;
-}
+ void dump();
+};
-static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
- ICmpInst *ICI) {
- unsigned Reduction = 0;
+} // namespace
- // Bail if this is comparing against a non-constant; there is nothing we can
- // do there.
- if (!isa<Constant>(ICI->getOperand(1)))
- return Reduction;
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+ return SROAArgValues.count(V);
+}
- // An icmp pred (alloca, C) becomes true if the predicate is true when
- // equal and false otherwise.
- bool Result = ICI->isTrueWhenEqual();
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+ Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+ if (SROAArgValues.empty() || SROAArgCosts.empty())
+ return false;
- SmallVector<Instruction *, 4> Worklist;
- Worklist.push_back(ICI);
- do {
- Instruction *U = Worklist.pop_back_val();
- Reduction += InlineConstants::InstrCost;
- for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
- UI != UE; ++UI) {
- Instruction *I = dyn_cast<Instruction>(*UI);
- if (!I || I->mayHaveSideEffects()) continue;
- if (I->getNumOperands() == 1)
- Worklist.push_back(I);
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- // If BO produces the same value as U, then the other operand is
- // irrelevant and we can put it into the Worklist to continue
- // deleting dead instructions. If BO produces the same value as the
- // other operand, we can delete BO but that's it.
- if (Result == true) {
- if (BO->getOpcode() == Instruction::Or)
- Worklist.push_back(I);
- if (BO->getOpcode() == Instruction::And)
- Reduction += InlineConstants::InstrCost;
- } else {
- if (BO->getOpcode() == Instruction::Or ||
- BO->getOpcode() == Instruction::Xor)
- Reduction += InlineConstants::InstrCost;
- if (BO->getOpcode() == Instruction::And)
- Worklist.push_back(I);
- }
- }
- if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
- BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
- if (BB->getSinglePredecessor())
- Reduction
- += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
- }
- }
- } while (!Worklist.empty());
+ DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+ if (ArgIt == SROAArgValues.end())
+ return false;
- return Reduction;
+ Arg = ArgIt->second;
+ CostIt = SROAArgCosts.find(Arg);
+ return CostIt != SROAArgCosts.end();
}
-/// \brief Compute the reduction possible for a given instruction if we are able
-/// to SROA an alloca.
+/// \brief Disable SROA for the candidate marked by this cost iterator.
///
-/// The reduction for this instruction is added to the SROAReduction output
-/// parameter. Returns false if this instruction is expected to defeat SROA in
-/// general.
-static bool countCodeReductionForSROAInst(Instruction *I,
- SmallVectorImpl<Value *> &Worklist,
- unsigned &SROAReduction) {
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!LI->isSimple())
- return false;
- SROAReduction += InlineConstants::InstrCost;
+/// This markes the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+ // If we're no longer able to perform SROA we need to undo its cost savings
+ // and prevent subsequent analysis.
+ Cost += CostIt->second;
+ SROACostSavings -= CostIt->second;
+ SROACostSavingsLost += CostIt->second;
+ SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+ disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ CostIt->second += InstructionCost;
+ SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+ DenseMap<Value *, int>::iterator CostIt,
+ int InstructionCost) {
+ if (IsSROAValid) {
+ accumulateSROACost(CostIt, InstructionCost);
return true;
}
- if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!SI->isSimple())
+ disableSROA(CostIt);
+ return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+ for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+ if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
return false;
- SROAReduction += InlineConstants::InstrCost;
- return true;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // If the GEP has variable indices, we won't be able to do much with it.
- if (!GEP->hasAllConstantIndices())
+ return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+ if (!TD)
+ return false;
+
+ unsigned IntPtrWidth = TD->getPointerSizeInBits();
+ assert(IntPtrWidth == Offset.getBitWidth());
+
+ for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+ GTI != GTE; ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+ if (!OpC)
+ if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+ OpC = dyn_cast<ConstantInt>(SimpleOp);
+ if (!OpC)
return false;
- // A non-zero GEP will likely become a mask operation after SROA.
- if (GEP->hasAllZeroIndices())
- SROAReduction += InlineConstants::InstrCost;
- Worklist.push_back(GEP);
- return true;
- }
+ if (OpC->isZero()) continue;
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
- // Track pointer through bitcasts.
- Worklist.push_back(BCI);
- SROAReduction += InlineConstants::InstrCost;
- return true;
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ unsigned ElementIdx = OpC->getZExtValue();
+ const StructLayout *SL = TD->getStructLayout(STy);
+ Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+ continue;
+ }
+
+ APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+ Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
}
+ return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+ // FIXME: Check whether inlining will turn a dynamic alloca into a static
+ // alloca, and handle that case.
- // We just look for non-constant operands to ICmp instructions as those will
- // defeat SROA. The actual reduction for these happens even without SROA.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
- return isa<Constant>(ICI->getOperand(1));
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
- // SROA can handle a select of alloca iff all uses of the alloca are
- // loads, and dereferenceable. We assume it's dereferenceable since
- // we're told the input is an alloca.
- for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
- UI != UE; ++UI) {
- LoadInst *LI = dyn_cast<LoadInst>(*UI);
- if (LI == 0 || !LI->isSimple())
+ // We will happily inline tatic alloca instructions or dynamic alloca
+ // instructions in always-inline situations.
+ if (AlwaysInline || I.isStaticAlloca())
+ return Base::visitAlloca(I);
+
+ // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+ // a variety of reasons, and so we would like to not inline them into
+ // functions which don't currently have a dynamic alloca. This simply
+ // disables inlining altogether in the presence of a dynamic alloca.
+ HasDynamicAlloca = true;
+ return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+ // FIXME: We should potentially be tracking values through phi nodes,
+ // especially when they collapse to a single value due to deleted CFG edges
+ // during inlining.
+
+ // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+ // though we don't want to propagate it's bonuses. The idea is to disable
+ // SROA if it *might* be used in an inappropriate manner.
+
+ // Phi nodes are always zero-cost.
+ return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+ Value *SROAArg;
+ DenseMap<Value *, int>::iterator CostIt;
+ bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+ SROAArg, CostIt);
+
+ // Try to fold GEPs of constant-offset call site argument pointers. This
+ // requires target data and inbounds GEPs.
+ if (TD && I.isInBounds()) {
+ // Check if we have a base + offset for the pointer.
+ Value *Ptr = I.getPointerOperand();
+ std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+ if (BaseAndOffset.first) {
+ // Check if the offset of this GEP is constant, and if so accumulate it
+ // into Offset.
+ if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+ // Non-constant GEPs aren't folded, and disable SROA.
+ if (SROACandidate)
+ disableSROA(CostIt);
return false;
+ }