13 files changed, 1189 insertions, 798 deletions
diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h
index 033e19bc7f..7116078349 100644
--- a/include/llvm/Analysis/CodeMetrics.h
+++ b/include/llvm/Analysis/CodeMetrics.h
@@ -20,9 +20,13 @@
 namespace llvm {
   class BasicBlock;
   class Function;
+  class Instruction;
   class TargetData;
   class Value;
 
+  /// \brief Check whether an instruction is likely to be "free" when lowered.
+  bool isInstructionFree(const Instruction *I, const TargetData *TD = 0);
+
   /// \brief Check whether a call will lower to something small.
   ///
   /// This tests checks whether calls to this function will lower to something
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index c804c46528..c523890472 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Function.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/ValueMap.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include <cassert>
@@ -25,162 +26,105 @@
 namespace llvm {
 
   class CallSite;
-  template<class PtrType, unsigned SmallSize>
-  class SmallPtrSet;
   class TargetData;
 
   namespace InlineConstants {
     // Various magic constants used to adjust heuristics.
     const int InstrCost = 5;
-    const int IndirectCallBonus = -100;
+    const int IndirectCallThreshold = 100;
     const int CallPenalty = 25;
     const int LastCallToStaticBonus = -15000;
     const int ColdccPenalty = 2000;
     const int NoreturnPenalty = 10000;
   }
 
-  /// InlineCost - Represent the cost of inlining a function. This
-  /// supports special values for functions which should "always" or
-  /// "never" be inlined. Otherwise, the cost represents a unitless
-  /// amount; smaller values increase the likelihood of the function
-  /// being inlined.
+  /// \brief Represents the cost of inlining a function.
+  ///
+  /// This supports special values for functions which should "always" or
+  /// "never" be inlined. Otherwise, the cost represents a unitless amount;
+  /// smaller values increase the likelihood of the function being inlined.
+  ///
+  /// Objects of this type also provide the adjusted threshold for inlining
+  /// based on the information available for a particular callsite. They can be
+  /// directly tested to determine if inlining should occur given the cost and
+  /// threshold for this cost metric.
   class InlineCost {
-    enum Kind {
-      Value,
-      Always,
-      Never
+    enum CostKind {
+      CK_Variable,
+      CK_Always,
+      CK_Never
     };
 
-    // This is a do-it-yourself implementation of
-    //   int Cost : 30;
-    //   unsigned Type : 2;
-    // We used to use bitfields, but they were sometimes miscompiled (PR3822).
-    enum { TYPE_BITS = 2 };
-    enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS };
-    unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS;
+    const int      Cost : 30; // The inlining cost if neither always nor never.
+    const unsigned Kind : 2;  // The type of cost, one of CostKind above.
 
-    Kind getType() const {
-      return Kind(TypedCost >> COST_BITS);
-    }
+    /// \brief The adjusted threshold against which this cost should be tested.
+    const int Threshold;
 
-    int getCost() const {
-      // Sign-extend the bottom COST_BITS bits.
-      return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS;
+    // Trivial constructor, interesting logic in the factory functions below.
+    InlineCost(int Cost, CostKind Kind, int Threshold)
+      : Cost(Cost), Kind(Kind), Threshold(Threshold) {}
+
+  public:
+    static InlineCost get(int Cost, int Threshold) {
+      InlineCost Result(Cost, CK_Variable, Threshold);
+      assert(Result.Cost == Cost && "Cost exceeds InlineCost precision");
+      return Result;
+    }
+    static InlineCost getAlways() {
+      return InlineCost(0, CK_Always, 0);
+    }
+    static InlineCost getNever() {
+      return InlineCost(0, CK_Never, 0);
     }
 
-    InlineCost(int C, int T) {
-      TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS);
-      assert(getCost() == C && "Cost exceeds InlineCost precision");
+    /// \brief Test whether the inline cost is low enough for inlining.
+    operator bool() const {
+      if (isAlways()) return true;
+      if (isNever()) return false;
+      return Cost < Threshold;
     }
-  public:
-    static InlineCost get(int Cost) { return InlineCost(Cost, Value); }
-    static InlineCost getAlways() { return InlineCost(0, Always); }
-    static InlineCost getNever() { return InlineCost(0, Never); }
 
-    bool isVariable() const { return getType() == Value; }
-    bool isAlways() const { return getType() == Always; }
-    bool isNever() const { return getType() == Never; }
+    bool isVariable() const { return Kind == CK_Variable; }
+    bool isAlways() const   { return Kind == CK_Always; }
+    bool isNever() const    { return Kind == CK_Never; }
 
-    /// getValue() - Return a "variable" inline cost's amount. It is
+    /// getCost() - Return a "variable" inline cost's amount. It is
     /// an error to call this on an "always" or "never" InlineCost.
-    int getValue() const {
-      assert(getType() == Value && "Invalid access of InlineCost");
-      return getCost();
+    int getCost() const {
+      assert(Kind == CK_Variable && "Invalid access of InlineCost");
+      return Cost;
+    }
+
+    /// \brief Get the cost delta from the threshold for inlining.
+    /// Only valid if the cost is of the variable kind. Returns a negative
+    /// value if the cost is too high to inline.
+    int getCostDelta() const {
+      return Threshold - getCost();
     }
   };
 
   /// InlineCostAnalyzer - Cost analyzer used by inliner.
   class InlineCostAnalyzer {
-    struct ArgInfo {
-    public:
-      unsigned ConstantWeight;
-      unsigned AllocaWeight;
-
-      ArgInfo(unsigned CWeight, unsigned AWeight)
-        : ConstantWeight(CWeight), AllocaWeight(AWeight)
-          {}
-    };
-
-    struct FunctionInfo {
-      CodeMetrics Metrics;
-
-      /// ArgumentWeights - Each formal argument of the function is inspected to
-      /// see if it is used in any contexts where making it a constant or alloca
-      /// would reduce the code size.  If so, we add some value to the argument
-      /// entry here.
-      std::vector<ArgInfo> ArgumentWeights;
-
-      /// PointerArgPairWeights - Weights to use when giving an inline bonus to
-      /// a call site due to correlated pairs of pointers.
-      DenseMap<std::pair<unsigned, unsigned>, unsigned> PointerArgPairWeights;
-
-      /// countCodeReductionForConstant - Figure out an approximation for how
-      /// many instructions will be constant folded if the specified value is
-      /// constant.
-      unsigned countCodeReductionForConstant(const CodeMetrics &Metrics,
-                                             Value *V);
-
-      /// countCodeReductionForAlloca - Figure out an approximation of how much
-      /// smaller the function will be if it is inlined into a context where an
-      /// argument becomes an alloca.
-      unsigned countCodeReductionForAlloca(const CodeMetrics &Metrics,
-                                           Value *V);
-
-      /// countCodeReductionForPointerPair - Count the bonus to apply to an
-      /// inline call site where a pair of arguments are pointers and one
-      /// argument is a constant offset from the other. The idea is to
-      /// recognize a common C++ idiom where a begin and end iterator are
-      /// actually pointers, and many operations on the pair of them will be
-      /// constants if the function is called with arguments that have
-      /// a constant offset.
-      void countCodeReductionForPointerPair(
-          const CodeMetrics &Metrics,
-          DenseMap<Value *, unsigned> &PointerArgs,
-          Value *V, unsigned ArgIdx);
-
-      /// analyzeFunction - Add information about the specified function
-      /// to the current structure.
-      void analyzeFunction(Function *F, const TargetData *TD);
-
-      /// NeverInline - Returns true if the function should never be
-      /// inlined into any caller.
-      bool NeverInline();
-    };
-
-    // The Function* for a function can be changed (by ArgumentPromotion);
-    // the ValueMap will update itself when this happens.
-    ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
-
     // TargetData if available, or null.
     const TargetData *TD;
 
-    int CountBonusForConstant(Value *V, Constant *C = NULL);
-    int ConstantFunctionBonus(CallSite CS, Constant *C);
-    int getInlineSize(CallSite CS, Function *Callee);
-    int getInlineBonuses(CallSite CS, Function *Callee);
   public:
     InlineCostAnalyzer(): TD(0) {}
 
     void setTargetData(const TargetData *TData) { TD = TData; }
 
-    /// getInlineCost - The heuristic used to determine if we should inline the
-    /// function call or not.
+    /// \brief Get an InlineCost object representing the cost of inlining this
+    /// callsite.
     ///
-    InlineCost getInlineCost(CallSite CS);
-    /// getCalledFunction - The heuristic used to determine if we should inline
-    /// the function call or not.  The callee is explicitly specified, to allow
-    /// you to calculate the cost of inlining a function via a pointer.  The
-    /// result assumes that the inlined version will always be used.  You should
-    /// weight it yourself in cases where this callee will not always be called.
-    InlineCost getInlineCost(CallSite CS, Function *Callee);
-
-    /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-    /// higher threshold to determine if the function call should be inlined.
-    float getInlineFudgeFactor(CallSite CS);
+    /// Note that threshold is passed into this function. Only costs below the
+    /// threshold are computed with any accuracy. The threshold can be used to
+    /// bound the computation necessary to determine whether the cost is
+    /// sufficiently low to warrant inlining.
+    InlineCost getInlineCost(CallSite CS, int Threshold);
 
     /// resetCachedFunctionInfo - erase any cached cost info for this function.
     void resetCachedCostInfo(Function* Caller) {
-      CachedFunctionInfo[Caller] = FunctionInfo();
     }
 
     /// growCachedCostInfo - update the cached cost info for Caller after Callee
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index f59479d738..bdc02fff73 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -65,11 +65,6 @@ struct Inliner : public CallGraphSCCPass {
   ///
   virtual InlineCost getInlineCost(CallSite CS) = 0;
 
-  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
-  // higher threshold to determine if the function call should be inlined.
-  ///
-  virtual float getInlineFudgeFactor(CallSite CS) = 0;
-
   /// resetCachedCostInfo - erase any cached cost data from the derived class.
   /// If the derived class has no such data this can be empty.
   /// 
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 6c93f78629..316e7bc934 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -50,6 +50,52 @@ bool llvm::callIsSmall(const Function *F) {
   return false;
 }
 
+bool llvm::isInstructionFree(const Instruction *I, const TargetData *TD) {
+  if (isa<PHINode>(I))
+    return true;
+
+  // If a GEP has all constant indices, it will probably be folded with
+  // a load/store.
+  if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
+    return GEP->hasAllConstantIndices();
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::objectsize:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+      // These intrinsics don't count as size.
+      return true;
+    }
+  }
+
+  if (const CastInst *CI = dyn_cast<CastInst>(I)) {
+    // Noop casts, including ptr <-> int,  don't count.
+    if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || isa<PtrToIntInst>(CI))
+      return true;
+    // trunc to a native type is free (assuming the target has compare and
+    // shift-right of the same width).
+    if (TD && isa<TruncInst>(CI) &&
+        TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
+      return true;
+    // Result of a cmp instruction is often extended (to be used by other
+    // cmp instructions, logical or return instructions). These are usually
+    // nop on most sane targets.
+    if (isa<CmpInst>(CI->getOperand(0)))
+      return true;
+  }
+
+  return false;
+}
+
 /// analyzeBasicBlock - Fill in the current structure with information gleaned
 /// from the specified block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
@@ -58,27 +104,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
   unsigned NumInstsBeforeThisBB = NumInsts;
   for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
        II != E; ++II) {
-    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+    if (isInstructionFree(II, TD))
+      continue;
 
     // Special handling for calls.
     if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
-      if (const IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(II)) {
-        switch (IntrinsicI->getIntrinsicID()) {
-        default: break;
-        case Intrinsic::dbg_declare:
-        case Intrinsic::dbg_value:
-        case Intrinsic::invariant_start:
-        case Intrinsic::invariant_end:
-        case Intrinsic::lifetime_start:
-        case Intrinsic::lifetime_end:
-        case Intrinsic::objectsize:
-        case Intrinsic::ptr_annotation:
-        case Intrinsic::var_annotation:
-          // These intrinsics don't count as size.
-          continue;
-        }
-      }
-
       ImmutableCallSite CS(cast<Instruction>(II));
 
       if (const Function *F = CS.getCalledFunction()) {
@@ -115,28 +145,6 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
     if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
       ++NumVectorInsts;
 
-    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
-      // Noop casts, including ptr <-> int,  don't count.
-      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) ||
-          isa<PtrToIntInst>(CI))
-        continue;
-      // trunc to a native type is free (assuming the target has compare and
-      // shift-right of the same width).
-      if (isa<TruncInst>(CI) && TD &&
-          TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType())))
-        continue;
-      // Result of a cmp instruction is often extended (to be used by other
-      // cmp instructions, logical or return instructions). These are usually
-      // nop on most sane targets.
-      if (isa<CmpInst>(CI->getOperand(0)))
-        continue;
-    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
-      // If a GEP has all constant indices, it will probably be folded with
-      // a load/store.
-      if (GEPI->hasAllConstantIndices())
-        continue;
-    }
-
     ++NumInsts;
   }
 
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index dedbfebea7..bc6c1687fd 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -11,659 +11,1014 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "inline-cost"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/CallingConv.h"
 #include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 
 using namespace llvm;
 
-unsigned InlineCostAnalyzer::FunctionInfo::countCodeReductionForConstant(
-    const CodeMetrics &Metrics, Value *V) {
-  unsigned Reduction = 0;
-  SmallVector<Value *, 4> Worklist;
-  Worklist.push_back(V);
-  do {
-    Value *V = Worklist.pop_back_val();
-    for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
-      User *U = *UI;
-      if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
-        // We will be able to eliminate all but one of the successors.
-        const TerminatorInst &TI = cast<TerminatorInst>(*U);
-        const unsigned NumSucc = TI.getNumSuccessors();
-        unsigned Instrs = 0;
-        for (unsigned I = 0; I != NumSucc; ++I)
-          Instrs += Metrics.NumBBInsts.lookup(TI.getSuccessor(I));
-        // We don't know which blocks will be eliminated, so use the average size.
-        Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
-        continue;
-      }
+namespace {
+
+class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
+  typedef InstVisitor<CallAnalyzer, bool> Base;
+  friend class InstVisitor<CallAnalyzer, bool>;
+
+  // TargetData if available, or null.
+  const TargetData *const TD;
+
+  // The called function.
+  Function &F;
+
+  int Threshold;
+  int Cost;
+  const bool AlwaysInline;
+
+  bool IsRecursive;
+  bool ExposesReturnsTwice;
+  bool HasDynamicAlloca;
+  unsigned NumInstructions, NumVectorInstructions;
+  int FiftyPercentVectorBonus, TenPercentVectorBonus;
+  int VectorBonus;
+
+  // While we walk the potentially-inlined instructions, we build up and
+  // maintain a mapping of simplified values specific to this callsite. The
+  // idea is to propagate any special information we have about arguments to
+  // this call through the inlinable section of the function, and account for
+  // likely simplifications post-inlining. The most important aspect we track
+  // is CFG altering simplifications -- when we prove a basic block dead, that
+  // can cause dramatic shifts in the cost of inlining a function.
+  DenseMap<Value *, Constant *> SimplifiedValues;
+
+  // Keep track of the values which map back (through function arguments) to
+  // allocas on the caller stack which could be simplified through SROA.
+  DenseMap<Value *, Value *> SROAArgValues;
+
+  // The mapping of caller Alloca values to their accumulated cost savings. If
+  // we have to disable SROA for one of the allocas, this tells us how much
+  // cost must be added.
+  DenseMap<Value *, int> SROAArgCosts;
+
+  // Keep track of values which map to a pointer base and constant offset.
+  DenseMap<Value *, std::pair<Value *, APInt> > ConstantOffsetPtrs;
+
+  // Custom simplification helper routines.
+  bool isAllocaDerivedArg(Value *V);
+  bool lookupSROAArgAndCost(Value *V, Value *&Arg,
+                            DenseMap<Value *, int>::iterator &CostIt);
+  void disableSROA(DenseMap<Value *, int>::iterator CostIt);
+  void disableSROA(Value *V);
+  void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                          int InstructionCost);
+  bool handleSROACandidate(bool IsSROAValid,
+                           DenseMap<Value *, int>::iterator CostIt,
+                           int InstructionCost);
+  bool isGEPOffsetConstant(GetElementPtrInst &GEP);
+  bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
+  ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
+
+  // Custom analysis routines.
+  bool analyzeBlock(BasicBlock *BB);
+
+  // Disable several entry points to the visitor so we don't accidentally use
+  // them by declaring but not defining them here.
+  void visit(Module *);     void visit(Module &);
+  void visit(Function *);   void visit(Function &);
+  void visit(BasicBlock *); void visit(BasicBlock &);
+
+  // Provide base case for our instruction visit.
+  bool visitInstruction(Instruction &I);
+
+  // Our visit overrides.
+  bool visitAlloca(AllocaInst &I);
+  bool visitPHI(PHINode &I);
+  bool visitGetElementPtr(GetElementPtrInst &I);
+  bool visitBitCast(BitCastInst &I);
+  bool visitPtrToInt(PtrToIntInst &I);
+  bool visitIntToPtr(IntToPtrInst &I);
+  bool visitCastInst(CastInst &I);
+  bool visitUnaryInstruction(UnaryInstruction &I);
+  bool visitICmp(ICmpInst &I);
+  bool visitSub(BinaryOperator &I);
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitLoad(LoadInst &I);
+  bool visitStore(StoreInst &I);
+  bool visitCallSite(CallSite CS);
+
+public:
+  CallAnalyzer(const TargetData *TD, Function &Callee, int Threshold)
+    : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
+      AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
+      IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
+      NumInstructions(0), NumVectorInstructions(0),
+      FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
+      NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
+      NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
+      NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) {
+  }
 
-      // Figure out if this instruction will be removed due to simple constant
-      // propagation.
-      Instruction &Inst = cast<Instruction>(*U);
-
-      // We can't constant propagate instructions which have effects or
-      // read memory.
-      //
-      // FIXME: It would be nice to capture the fact that a load from a
-      // pointer-to-constant-global is actually a *really* good thing to zap.
-      // Unfortunately, we don't know the pointer that may get propagated here,
-      // so we can't make this decision.
-      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
-          isa<AllocaInst>(Inst))
-        continue;
+  bool analyzeCall(CallSite CS);
 
-      bool AllOperandsConstant = true;
-      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
-        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
-          AllOperandsConstant = false;
-          break;
-        }
-      if (!AllOperandsConstant)
-        continue;
+  int getThreshold() { return Threshold; }
+  int getCost() { return Cost; }
 
-      // We will get to remove this instruction...
-      Reduction += InlineConstants::InstrCost;
+  // Keep a bunch of stats about the cost savings found so we can print them
+  // out when debugging.
+  unsigned NumConstantArgs;
+  unsigned NumConstantOffsetPtrArgs;
+  unsigned NumAllocaArgs;
+  unsigned NumConstantPtrCmps;
+  unsigned NumConstantPtrDiffs;
+  unsigned NumInstructionsSimplified;
+  unsigned SROACostSavings;
+  unsigned SROACostSavingsLost;
 
-      // And any other instructions that use it which become constants
-      // themselves.
-      Worklist.push_back(&Inst);
-    }
-  } while (!Worklist.empty());
-  return Reduction;
-}
+  void dump();
+};
 
-static unsigned countCodeReductionForAllocaICmp(const CodeMetrics &Metrics,
-                                                ICmpInst *ICI) {
-  unsigned Reduction = 0;
+} // namespace
 
-  // Bail if this is comparing against a non-constant; there is nothing we can
-  // do there.
-  if (!isa<Constant>(ICI->getOperand(1)))
-    return Reduction;
+/// \brief Test whether the given value is an Alloca-derived function argument.
+bool CallAnalyzer::isAllocaDerivedArg(Value *V) {
+  return SROAArgValues.count(V);
+}
 
-  // An icmp pred (alloca, C) becomes true if the predicate is true when
-  // equal and false otherwise.
-  bool Result = ICI->isTrueWhenEqual();
+/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to.
+/// Returns false if V does not map to a SROA-candidate.
+bool CallAnalyzer::lookupSROAArgAndCost(
+    Value *V, Value *&Arg, DenseMap<Value *, int>::iterator &CostIt) {
+  if (SROAArgValues.empty() || SROAArgCosts.empty())
+    return false;
 
-  SmallVector<Instruction *, 4> Worklist;
-  Worklist.push_back(ICI);
-  do {
-    Instruction *U = Worklist.pop_back_val();
-    Reduction += InlineConstants::InstrCost;
-    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
-         UI != UE; ++UI) {
-      Instruction *I = dyn_cast<Instruction>(*UI);
-      if (!I || I->mayHaveSideEffects()) continue;
-      if (I->getNumOperands() == 1)
-        Worklist.push_back(I);
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
-        // If BO produces the same value as U, then the other operand is
-        // irrelevant and we can put it into the Worklist to continue
-        // deleting dead instructions. If BO produces the same value as the
-        // other operand, we can delete BO but that's it.
-        if (Result == true) {
-          if (BO->getOpcode() == Instruction::Or)
-            Worklist.push_back(I);
-          if (BO->getOpcode() == Instruction::And)
-            Reduction += InlineConstants::InstrCost;
-        } else {
-          if (BO->getOpcode() == Instruction::Or ||
-              BO->getOpcode() == Instruction::Xor)
-            Reduction += InlineConstants::InstrCost;
-          if (BO->getOpcode() == Instruction::And)
-            Worklist.push_back(I);
-        }
-      }
-      if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
-        BasicBlock *BB = BI->getSuccessor(Result ? 0 : 1);
-        if (BB->getSinglePredecessor())
-          Reduction
-            += InlineConstants::InstrCost * Metrics.NumBBInsts.lookup(BB);
-      }
-    }
-  } while (!Worklist.empty());
+  DenseMap<Value *, Value *>::iterator ArgIt = SROAArgValues.find(V);
+  if (ArgIt == SROAArgValues.end())
+    return false;
 
-  return Reduction;
+  Arg = ArgIt->second;
+  CostIt = SROAArgCosts.find(Arg);
+  return CostIt != SROAArgCosts.end();
 }
 
-/// \brief Compute the reduction possible for a given instruction if we are able
-/// to SROA an alloca.
+/// \brief Disable SROA for the candidate marked by this cost iterator.
 ///
-/// The reduction for this instruction is added to the SROAReduction output
-/// parameter. Returns false if this instruction is expected to defeat SROA in
-/// general.
-static bool countCodeReductionForSROAInst(Instruction *I,
-                                          SmallVectorImpl<Value *> &Worklist,
-                                          unsigned &SROAReduction) {
-  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    if (!LI->isSimple())
-      return false;
-    SROAReduction += InlineConstants::InstrCost;
+/// This markes the candidate as no longer viable for SROA, and adds the cost
+/// savings associated with it back into the inline cost measurement.
+void CallAnalyzer::disableSROA(DenseMap<Value *, int>::iterator CostIt) {
+  // If we're no longer able to perform SROA we need to undo its cost savings
+  // and prevent subsequent analysis.
+  Cost += CostIt->second;
+  SROACostSavings -= CostIt->second;
+  SROACostSavingsLost += CostIt->second;
+  SROAArgCosts.erase(CostIt);
+}
+
+/// \brief If 'V' maps to a SROA candidate, disable SROA for it.
+void CallAnalyzer::disableSROA(Value *V) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  if (lookupSROAArgAndCost(V, SROAArg, CostIt))
+    disableSROA(CostIt);
+}
+
+/// \brief Accumulate the given cost for a particular SROA candidate.
+void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
+                                      int InstructionCost) {
+  CostIt->second += InstructionCost;
+  SROACostSavings += InstructionCost;
+}
+
+/// \brief Helper for the common pattern of handling a SROA candidate.
+/// Either accumulates the cost savings if the SROA remains valid, or disables
+/// SROA for the candidate.
+bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
+                                       DenseMap<Value *, int>::iterator CostIt,
+                                       int InstructionCost) {
+  if (IsSROAValid) {
+    accumulateSROACost(CostIt, InstructionCost);
     return true;
   }
 
-  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    if (!SI->isSimple())
+  disableSROA(CostIt);
+  return false;
+}
+
+/// \brief Check whether a GEP's indices are all constant.
+///
+/// Respects any simplified values known during the analysis of this callsite.
+bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) {
+  for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I)
+    if (!isa<Constant>(*I) && !SimplifiedValues.lookup(*I))
       return false;
-    SROAReduction += InlineConstants::InstrCost;
-    return true;
-  }
 
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
-    // If the GEP has variable indices, we won't be able to do much with it.
-    if (!GEP->hasAllConstantIndices())
+  return true;
+}
+
+/// \brief Accumulate a constant GEP offset into an APInt if possible.
+///
+/// Returns false if unable to compute the offset for any reason. Respects any
+/// simplified values known during the analysis of this callsite.
+bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
+  if (!TD)
+    return false;
+
+  unsigned IntPtrWidth = TD->getPointerSizeInBits();
+  assert(IntPtrWidth == Offset.getBitWidth());
+
+  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
+       GTI != GTE; ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
+    if (!OpC)
+      if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand()))
+        OpC = dyn_cast<ConstantInt>(SimpleOp);
+    if (!OpC)
       return false;
-    // A non-zero GEP will likely become a mask operation after SROA.
-    if (GEP->hasAllZeroIndices())
-      SROAReduction += InlineConstants::InstrCost;
-    Worklist.push_back(GEP);
-    return true;
-  }
+    if (OpC->isZero()) continue;
 
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
-    // Track pointer through bitcasts.
-    Worklist.push_back(BCI);
-    SROAReduction += InlineConstants::InstrCost;
-    return true;
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      unsigned ElementIdx = OpC->getZExtValue();
+      const StructLayout *SL = TD->getStructLayout(STy);
+      Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
+      continue;
+    }
+
+    APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType()));
+    Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize;
   }
+  return true;
+}
+
+bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+  // FIXME: Check whether inlining will turn a dynamic alloca into a static
+  // alloca, and handle that case.
 
-  // We just look for non-constant operands to ICmp instructions as those will
-  // defeat SROA. The actual reduction for these happens even without SROA.
-  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
-    return isa<Constant>(ICI->getOperand(1));
-
-  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
-    // SROA can handle a select of alloca iff all uses of the alloca are
-    // loads, and dereferenceable. We assume it's dereferenceable since
-    // we're told the input is an alloca.
-    for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
-         UI != UE; ++UI) {
-      LoadInst *LI = dyn_cast<LoadInst>(*UI);
-      if (LI == 0 || !LI->isSimple())
+  // We will happily inline tatic alloca instructions or dynamic alloca
+  // instructions in always-inline situations.
+  if (AlwaysInline || I.isStaticAlloca())
+    return Base::visitAlloca(I);
+
+  // FIXME: This is overly conservative. Dynamic allocas are inefficient for
+  // a variety of reasons, and so we would like to not inline them into
+  // functions which don't currently have a dynamic alloca. This simply
+  // disables inlining altogether in the presence of a dynamic alloca.
+  HasDynamicAlloca = true;
+  return false;
+}
+
+bool CallAnalyzer::visitPHI(PHINode &I) {
+  // FIXME: We should potentially be tracking values through phi nodes,
+  // especially when they collapse to a single value due to deleted CFG edges
+  // during inlining.
+
+  // FIXME: We need to propagate SROA *disabling* through phi nodes, even
+  // though we don't want to propagate it's bonuses. The idea is to disable
+  // SROA if it *might* be used in an inappropriate manner.
+
+  // Phi nodes are always zero-cost.
+  return true;
+}
+
+bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) {
+  Value *SROAArg;
+  DenseMap<Value *, int>::iterator CostIt;
+  bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(),
+                                            SROAArg, CostIt);
+
+  // Try to fold GEPs of constant-offset call site argument pointers. This
+  // requires target data and inbounds GEPs.
+  if (TD && I.isInBounds()) {
+    // Check if we have a base + offset for the pointer.
+    Value *Ptr = I.getPointerOperand();
+    std::pair<Value *, APInt> BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr);
+    if (BaseAndOffset.first) {
+      // Check if the offset of this GEP is constant, and if so accumulate it
+      // into Offset.
+      if (!accumulateGEPOffset(cast<GEPOperator>(I), BaseAndOffset.second)) {
+        // Non-constant GEPs aren't folded, and disable SROA.
+        if (SROACandidate)
+          disableSROA(CostIt);
         return false;
+      }