diff options
-rw-r--r-- | include/llvm/Analysis/CodeMetrics.h | 4 | ||||
-rw-r--r-- | include/llvm/Analysis/InlineCost.h | 178 | ||||
-rw-r--r-- | include/llvm/Transforms/IPO/InlinerPass.h | 5 | ||||
-rw-r--r-- | lib/Analysis/CodeMetrics.cpp | 88 | ||||
-rw-r--r-- | lib/Analysis/InlineCost.cpp | 1441 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineAlways.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/IPO/InlineSimple.cpp | 5 | ||||
-rw-r--r-- | lib/Transforms/IPO/Inliner.cpp | 53 | ||||
-rw-r--r-- | test/Transforms/Inline/alloca-bonus.ll | 41 | ||||
-rw-r--r-- | test/Transforms/Inline/dynamic_alloca_test.ll | 5 | ||||
-rw-r--r-- | test/Transforms/Inline/inline_constprop.ll | 123 | ||||
-rw-r--r-- | test/Transforms/Inline/noinline-recursive-fn.ll | 37 | ||||
-rw-r--r-- | test/Transforms/Inline/ptr-diff.ll | 2 |
13 files changed, 1189 insertions, 798 deletions
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h index 033e19bc7f..7116078349 100644 --- a/include/llvm/Analysis/CodeMetrics.h +++ b/include/llvm/Analysis/CodeMetrics.h @@ -20,9 +20,13 @@ namespace llvm { class BasicBlock; class Function; + class Instruction; class TargetData; class Value; + /// \brief Check whether an instruction is likely to be "free" when lowered. + bool isInstructionFree(const Instruction *I, const TargetData *TD = 0); + /// \brief Check whether a call will lower to something small. /// /// This tests checks whether calls to this function will lower to something diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index c804c46528..c523890472 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -16,6 +16,7 @@ #include "llvm/Function.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/ValueMap.h" #include "llvm/Analysis/CodeMetrics.h" #include <cassert> @@ -25,162 +26,105 @@ namespace llvm { class CallSite; - template<class PtrType, unsigned SmallSize> - class SmallPtrSet; class TargetData; namespace InlineConstants { // Various magic constants used to adjust heuristics. const int InstrCost = 5; - const int IndirectCallBonus = -100; + const int IndirectCallThreshold = 100; const int CallPenalty = 25; const int LastCallToStaticBonus = -15000; const int ColdccPenalty = 2000; const int NoreturnPenalty = 10000; } - /// InlineCost - Represent the cost of inlining a function. This - /// supports special values for functions which should "always" or - /// "never" be inlined. Otherwise, the cost represents a unitless - /// amount; smaller values increase the likelihood of the function - /// being inlined. + /// \brief Represents the cost of inlining a function. + /// + /// This supports special values for functions which should "always" or + /// "never" be inlined. Otherwise, the cost represents a unitless amount; + /// smaller values increase the likelihood of the function being inlined. + /// + /// Objects of this type also provide the adjusted threshold for inlining + /// based on the information available for a particular callsite. They can be + /// directly tested to determine if inlining should occur given the cost and + /// threshold for this cost metric. class InlineCost { - enum Kind { - Value, - Always, - Never + enum CostKind { + CK_Variable, + CK_Always, + CK_Never }; - // This is a do-it-yourself implementation of - // int Cost : 30; - // unsigned Type : 2; - // We used to use bitfields, but they were sometimes miscompiled (PR3822). - enum { TYPE_BITS = 2 }; - enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS }; - unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS; + const int Cost : 30; // The inlining cost if neither always nor never. + const unsigned Kind : 2; // The type of cost, one of CostKind above. - Kind getType() const { - return Kind(TypedCost >> COST_BITS); - } + /// \brief The adjusted threshold against which this cost should be tested. + const int Threshold; - int getCost() const { - // Sign-extend the bottom COST_BITS bits. - return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS; + // Trivial constructor, interesting logic in the factory functions below. + InlineCost(int Cost, CostKind Kind, int Threshold) + : Cost(Cost), Kind(Kind), Threshold(Threshold) {} + + public: + static InlineCost get(int Cost, int Threshold) { + InlineCost Result(Cost, CK_Variable, Threshold); + assert(Result.Cost == Cost && "Cost exceeds InlineCost precision"); + return Result; + } + static InlineCost getAlways() { + return InlineCost(0, CK_Always, 0); + } + static InlineCost getNever() { + return InlineCost(0, CK_Never, 0); } - InlineCost(int C, int T) { - TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS); - assert(getCost() == C && "Cost exceeds InlineCost precision"); + /// \brief Test whether the inline cost is low enough for inlining. + operator bool() const { + if (isAlways()) return true; + if (isNever()) return false; + return Cost < Threshold; } - public: - static InlineCost get(int Cost) { return InlineCost(Cost, Value); } - static InlineCost getAlways() { return InlineCost(0, Always); } - static InlineCost getNever() { return InlineCost(0, Never); } - bool isVariable() const { return getType() == Value; } - bool isAlways() const { return getType() == Always; } - bool isNever() const { return getType() == Never; } + bool isVariable() const { return Kind == CK_Variable; } + bool isAlways() const { return Kind == CK_Always; } + bool isNever() const { return Kind == CK_Never; } - /// getValue() - Return a "variable" inline cost's amount. It is + /// getCost() - Return a "variable" inline cost's amount. It is /// an error to call this on an "always" or "never" InlineCost. - int getValue() const { - assert(getType() == Value && "Invalid access of InlineCost"); - return getCost(); + int getCost() const { + assert(Kind == CK_Variable && "Invalid access of InlineCost"); + return Cost; + } + + /// \brief Get the cost delta from the threshold for inlining. + /// Only valid if the cost is of the variable kind. Returns a negative + /// value if the cost is too high to inline. + int getCostDelta() const { + return Threshold - getCost(); } }; /// InlineCostAnalyzer - Cost analyzer used by inliner. class InlineCostAnalyzer { - struct ArgInfo { - public: - unsigned ConstantWeight; - unsigned AllocaWeight; - - ArgInfo(unsigned CWeight, unsigned AWeight) - : ConstantWeight(CWeight), AllocaWeight(AWeight) - {} - }; - - struct FunctionInfo { - CodeMetrics Metrics; - - /// ArgumentWeights - Each formal argument of the function is inspected to - /// see if it is used in any contexts where making it a constant or alloca - /// would reduce the code size. If so, we add some value to the argument - /// entry here. - std::vector<ArgInfo> ArgumentWeights; - - /// PointerArgPairWeights - Weights to use when giving an inline bonus to - /// a call site due to correlated pairs of pointers. - DenseMap<std::pair<unsigned, unsigned>, unsigned> PointerArgPairWeights; - - /// countCodeReductionForConstant - Figure out an approximation for how - /// many instructions will be constant folded if the specified value is - /// constant. - unsigned countCodeReductionForConstant(const CodeMetrics &Metrics, - Value *V); - - /// countCodeReductionForAlloca - Figure out an approximation of how much - /// smaller the function will be if it is inlined into a context where an - /// argument becomes an alloca. - unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics, - Value *V); - - /// countCodeReductionForPointerPair - Count the bonus to apply to an - /// inline call site where a pair of arguments are pointers and one - /// argument is a constant offset from the other. The idea is to - /// recognize a common C++ idiom where a begin and end iterator are - /// actually pointers, and many operations on the pair of them will be - /// constants if the function is called with arguments that have - /// a constant offset. - void countCodeReductionForPointerPair( - const CodeMetrics &Metrics, - DenseMap<Value *, unsigned> &PointerArgs, - Value *V, unsigned ArgIdx); - - /// analyzeFunction - Add information about the specified function - /// to the current structure. - void analyzeFunction(Function *F, const TargetData *TD); - - /// NeverInline - Returns true if the function should never be - /// inlined into any caller. - bool NeverInline(); - }; - - // The Function* for a function can be changed (by ArgumentPromotion); - // the ValueMap will update itself when this happens. - ValueMap<const Function *, FunctionInfo> CachedFunctionInfo; - // TargetData if available, or null. const TargetData *TD; - int CountBonusForConstant(Value *V, Constant *C = NULL); - int ConstantFunctionBonus(CallSite CS, Constant *C); - int getInlineSize(CallSite CS, Function *Callee); - int getInlineBonuses(CallSite CS, Function *Callee); public: InlineCostAnalyzer(): TD(0) {} void setTargetData(const TargetData *TData) { TD = TData; } - /// getInlineCost - The heuristic used to determine if we should inline the - /// function call or not. + /// \brief Get an InlineCost object representing the cost of inlining this + /// callsite. /// - InlineCost getInlineCost(CallSite CS); - /// getCalledFunction - The heuristic used to determine if we should inline - /// the function call or not. The callee is explicitly specified, to allow - /// you to calculate the cost of inlining a function via a pointer. The - /// result assumes that the inlined version will always be used. You should - /// weight it yourself in cases where this callee will not always be called. - InlineCost getInlineCost(CallSite CS, Function *Callee); - - /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a - /// higher threshold to determine if the function call should be inlined. - float getInlineFudgeFactor(CallSite CS); + /// Note that threshold is passed into this function. Only costs below the + /// threshold are computed with any accuracy. The threshold can be used to + /// bound the computation necessary to determine whether the cost is + /// sufficiently low to warrant inlining. + InlineCost getInlineCost(CallSite CS, int Threshold); /// resetCachedFunctionInfo - erase any cached cost info for this function. void resetCachedCostInfo(Function* Caller) { - CachedFunctionInfo[Caller] = FunctionInfo(); } /// growCachedCostInfo - update the cached cost info for Caller after Callee diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h index f59479d738..bdc02fff73 100644 --- a/include/llvm/Transforms/IPO/InlinerPass.h +++ b/include/llvm/Transforms/IPO/InlinerPass.h @@ -65,11 +65,6 @@ struct Inliner : public CallGraphSCCPass { /// virtual InlineCost getInlineCost(CallSite CS) = 0; - // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a - // higher threshold to determine if the function call should be inlined. - /// - virtual float getInlineFudgeFactor(CallSite CS) = 0; - /// resetCachedCostInfo - erase any cached cost data from the derived class. /// If the derived class has no such data this can be empty. /// diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 6c93f78629..316e7bc934 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -50,6 +50,52 @@ bool llvm::callIsSmall(const Function *F) { return false; } +bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) { + if (isa<PHINode>(I)) + return true; + + // If a GEP has all constant indices, it will probably be folded with + // a load/store. + if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + return GEP->hasAllConstantIndices(); + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + switch (II->getIntrinsicID()) { + default: + return false; + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't count as size. + return true; + } + } + + if (const CastInst *CI = dyn_cast<CastInst>(I)) { + // Noop casts, including ptr <-> int, don't count. + if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI)) + return true; + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (TD && isa<TruncInst>(CI) && + TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) + return true; + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa<CmpInst>(CI->getOperand(0))) + return true; + } + + return false; +} + /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, @@ -58,27 +104,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isa<PHINode>(II)) continue; // PHI nodes don't count. + if (isInstructionFree(II, TD)) + continue; // Special handling for calls. if (isa<CallInst>(II) || isa<InvokeInst>(II)) { - if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) { - switch (IntrinsicI->getIntrinsicID()) { - default: break; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - continue; - } - } - ImmutableCallSite CS(cast<Instruction>(II)); if (const Function *F = CS.getCalledFunction()) { @@ -115,28 +145,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy()) ++NumVectorInsts; - if (const CastInst *CI = dyn_cast<CastInst>(II)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || - isa<PtrToIntInst>(CI)) - continue; - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (isa<TruncInst>(CI) && TD && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - continue; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa<CmpInst>(CI->getOperand(0))) - continue; - } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){ - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (GEPI->hasAllConstantIndices()) - continue; - } - ++NumInsts; } diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index dedbfebea7..bc6c1687fd 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -11,659 +11,1014 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "inline-cost" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstVisitor.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/CallingConv.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" +#include "llvm/GlobalAlias.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/SmallPtrSet.h" using namespace llvm; -unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant( - const CodeMetrics &Metrics, Value *V) { - unsigned Reduction = 0; - SmallVector<Value *, 4> Worklist; - Worklist.push_back(V); - do { - Value *V = Worklist.pop_back_val(); - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - User *U = *UI; - if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { - // We will be able to eliminate all but one of the successors. - const TerminatorInst &TI = cast<TerminatorInst>(*U); - const unsigned NumSucc = TI.getNumSuccessors(); - unsigned Instrs = 0; - for (unsigned I = 0; I != NumSucc; ++I) - Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I)); - // We don't know which blocks will be eliminated, so use the average size. - Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc; - continue; - } +namespace { + +class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> { + typedef InstVisitor<CallAnalyzer, bool> Base; + friend class InstVisitor<CallAnalyzer, bool>; + + // TargetData if available, or null. + const TargetData *const TD; + + // The called function. + Function &F; + + int Threshold; + int Cost; + const bool AlwaysInline; + + bool IsRecursive; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap<Value *, Constant *> SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap<Value *, Value *> SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap<Value *, int> SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap<Value *, int>::iterator &CostIt); + void disableSROA(DenseMap<Value *, int>::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold) + : TD(TD), F(Callee), Threshold(Threshold), Cost(0), + AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)), + IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), + NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { + } - // Figure out if this instruction will be removed due to simple constant - // propagation. - Instruction &Inst = cast<Instruction>(*U); - - // We can't constant propagate instructions which have effects or - // read memory. - // - // FIXME: It would be nice to capture the fact that a load from a - // pointer-to-constant-global is actually a *really* good thing to zap. - // Unfortunately, we don't know the pointer that may get propagated here, - // so we can't make this decision. - if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || - isa<AllocaInst>(Inst)) - continue; + bool analyzeCall(CallSite CS); - bool AllOperandsConstant = true; - for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) - if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { - AllOperandsConstant = false; - break; - } - if (!AllOperandsConstant) - continue; + int getThreshold() { return Threshold; } + int getCost() { return Cost; } - // We will get to remove this instruction... - Reduction += InlineConstants::InstrCost; + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; - // And any other instructions that use it which become constants - // themselves. - Worklist.push_back(&Inst); - } - } while (!Worklist.empty()); - return Reduction; -} + void dump(); +}; -static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics, - ICmpInst *ICI) { - unsigned Reduction = 0; +} // namespace - // Bail if this is comparing against a non-constant; there is nothing we can - // do there. - if (!isa<Constant>(ICI->getOperand(1))) - return Reduction; +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} - // An icmp pred (alloca, C) becomes true if the predicate is true when - // equal and false otherwise. - bool Result = ICI->isTrueWhenEqual(); +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; - SmallVector<Instruction *, 4> Worklist; - Worklist.push_back(ICI); - do { - Instruction *U = Worklist.pop_back_val(); - Reduction += InlineConstants::InstrCost; - for (Value::use_iterator UI = U->use_begin(), UE = U->use_end(); - UI != UE; ++UI) { - Instruction *I = dyn_cast<Instruction>(*UI); - if (!I || I->mayHaveSideEffects()) continue; - if (I->getNumOperands() == 1) - Worklist.push_back(I); - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { - // If BO produces the same value as U, then the other operand is - // irrelevant and we can put it into the Worklist to continue - // deleting dead instructions. If BO produces the same value as the - // other operand, we can delete BO but that's it. - if (Result == true) { - if (BO->getOpcode() == Instruction::Or) - Worklist.push_back(I); - if (BO->getOpcode() == Instruction::And) - Reduction += InlineConstants::InstrCost; - } else { - if (BO->getOpcode() == Instruction::Or || - BO->getOpcode() == Instruction::Xor) - Reduction += InlineConstants::InstrCost; - if (BO->getOpcode() == Instruction::And) - Worklist.push_back(I); - } - } - if (BranchInst *BI = dyn_cast<BranchInst>(I)) { - BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1); - if (BB->getSinglePredecessor()) - Reduction - += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB); - } - } - } while (!Worklist.empty()); + DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; - return Reduction; + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); } -/// \brief Compute the reduction possible for a given instruction if we are able -/// to SROA an alloca. +/// \brief Disable SROA for the candidate marked by this cost iterator. /// -/// The reduction for this instruction is added to the SROAReduction output -/// parameter. Returns false if this instruction is expected to defeat SROA in -/// general. -static bool countCodeReductionForSROAInst(Instruction *I, - SmallVectorImpl<Value *> &Worklist, - unsigned &SROAReduction) { - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - if (!LI->isSimple()) - return false; - SROAReduction += InlineConstants::InstrCost; +/// This markes the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap<Value *, int>::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); return true; } - if (StoreInst *SI = dyn_cast<StoreInst>(I)) { - if (!SI->isSimple()) + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I)) return false; - SROAReduction += InlineConstants::InstrCost; - return true; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has variable indices, we won't be able to do much with it. - if (!GEP->hasAllConstantIndices()) + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast<ConstantInt>(SimpleOp); + if (!OpC) return false; - // A non-zero GEP will likely become a mask operation after SROA. - if (GEP->hasAllZeroIndices()) - SROAReduction += InlineConstants::InstrCost; - Worklist.push_back(GEP); - return true; - } + if (OpC->isZero()) continue; - if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { - // Track pointer through bitcasts. - Worklist.push_back(BCI); - SROAReduction += InlineConstants::InstrCost; - return true; + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. - // We just look for non-constant operands to ICmp instructions as those will - // defeat SROA. The actual reduction for these happens even without SROA. - if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) - return isa<Constant>(ICI->getOperand(1)); - - if (SelectInst *SI = dyn_cast<SelectInst>(I)) { - // SROA can handle a select of alloca iff all uses of the alloca are - // loads, and dereferenceable. We assume it's dereferenceable since - // we're told the input is an alloca. - for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); - UI != UE; ++UI) { - LoadInst *LI = dyn_cast<LoadInst>(*UI); - if (LI == 0 || !LI->isSimple()) + // We will happily inline tatic alloca instructions or dynamic alloca + // instructions in always-inline situations. + if (AlwaysInline || I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap<Value *, int>::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); return false; + } |