33 files changed, 1527 insertions, 5038 deletions
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index f43baf5a76..a097308640 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -20,9 +20,9 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstIterator.h"
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index 6214e3b703..e755008808 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -30,7 +30,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileInfo.h"
-#include "llvm/Function.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include <set>
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index b3fc6e338c..fd55e082ac 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,7 +21,6 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
-  ObjCARC.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index e6abfdf581..015fd2e6e6 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -23,14 +23,14 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/CommandLine.h"
@@ -41,7 +41,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetLowering.h"
-#include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/Transforms/Utils/BypassSlowDivision.h"
@@ -106,6 +105,8 @@ namespace {
       }
     bool runOnFunction(Function &F);
 
+    const char *getPassName() const { return "CodeGen Prepare"; }
+
     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ProfileInfo>();
@@ -148,11 +149,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   TLInfo = &getAnalysis<TargetLibraryInfo>();
   DT = getAnalysisIfAvailable<DominatorTree>();
   PFI = getAnalysisIfAvailable<ProfileInfo>();
-  OptSize = F.getFnAttributes().hasAttribute(Attributes::OptimizeForSize);
+  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                           Attribute::OptimizeForSize);
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     for (Function::iterator I = F.begin(); I != F.end(); I++)
@@ -727,9 +729,9 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
   // It's not safe to eliminate the sign / zero extension of the return value.
   // See llvm::isInTailCallPosition().
   const Function *F = BB->getParent();
-  Attributes CallerRetAttr = F->getAttributes().getRetAttributes();
-  if (CallerRetAttr.hasAttribute(Attributes::ZExt) ||
-      CallerRetAttr.hasAttribute(Attributes::SExt))
+  AttributeSet CallerAttrs = F->getAttributes();
+  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
+      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
     return false;
 
   // Make sure there are no instructions between the PHI and return, or that the
@@ -786,11 +788,11 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
 
     // Conservatively require the attributes of the call to match those of the
     // return. Ignore noalias because it doesn't affect the call sequence.
-    Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes();
-    if (AttrBuilder(CalleeRetAttr).
-          removeAttribute(Attributes::NoAlias) !=
-        AttrBuilder(CallerRetAttr).
-          removeAttribute(Attributes::NoAlias))
+    AttributeSet CalleeAttrs = CS.getAttributes();
+    if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias) !=
+        AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
+          removeAttribute(Attribute::NoAlias))
       continue;
 
     // Make sure the call instruction is followed by an unconditional branch to
@@ -817,6 +819,629 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
 // Memory Optimization
 //===----------------------------------------------------------------------===//
 
+namespace {
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+  Value *BaseReg;
+  Value *ScaledReg;
+  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+  
+  bool operator==(const ExtAddrMode& O) const {
+    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+  AM.print(OS);
+  return OS;
+}
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  OS << ']';
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+#endif
+
+
+/// \brief A helper class for matching addressing modes.
+///
+/// This encapsulates the logic for matching the target-legal addressing modes.
+class AddressingModeMatcher {
+  SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetLowering &TLI;
+
+  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+  /// the memory instruction that we're computing this address for.
+  Type *AccessTy;
+  Instruction *MemoryInst;
+  
+  /// AddrMode - This is the addressing mode that we're building up.  This is
+  /// part of the return value of this addressing mode matching stuff.
+  ExtAddrMode &AddrMode;
+  
+  /// IgnoreProfitability - This is set to true when we should not do
+  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
+  /// always returns true.
+  bool IgnoreProfitability;
+  
+  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+                        const TargetLowering &T, Type *AT,
+                        Instruction *MI, ExtAddrMode &AM)
+    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
+    IgnoreProfitability = false;
+  }
+public:
+  
+  /// Match - Find the maximal addressing mode that a load/store of V can fold,
+  /// give an access type of AccessTy.  This returns a list of involved
+  /// instructions in AddrModeInsts.
+  static ExtAddrMode Match(Value *V, Type *AccessTy,
+                           Instruction *MemoryInst,
+                           SmallVectorImpl<Instruction*> &AddrModeInsts,
+                           const TargetLowering &TLI) {
+    ExtAddrMode Result;
+
+    bool Success = 
+      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+                            MemoryInst, Result).MatchAddr(V, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+    return Result;
+  }
+private:
+  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+  bool MatchAddr(Value *V, unsigned Depth);
+  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
+  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+                                            ExtAddrMode &AMBefore,
+                                            ExtAddrMode &AMAfter);
+  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+};
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+  
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+  
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+  
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+      
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+  
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+    
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+    
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+    
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+    
+    int64_t ConstantOffset = 0;
+    const DataLayout *TD = TLI.getDataLayout();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+          
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+    
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+      
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  return false;
+}
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+  
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      unsigned opNo = UI.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+    
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+      
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+    
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+                          TLI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+  
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+  
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+  
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  return Val->isUsedInBasicBlock(MemoryInst->getParent());
+}
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+  
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+  
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+  
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+    
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    Type *AddressAccessTy =
+      cast<PointerType>(Address->getType())->getElementType();
+    
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+    
+    MatchedAddrModeInsts.clear();
+  }
+  
+  return true;
+}
+
+} // end anonymous namespace
+
 /// IsNonLocalValue - Return true if the specified values are defined in a
 /// different basic block than BB.
 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 27efde53cd..d5a96eceb9 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -22,9 +22,9 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Constant.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Instruction.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index b5a2a25ba0..995782e1bc 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -16,11 +16,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -97,12 +99,29 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
     Value *Incoming = P->getIncomingValue(i);
     if (isa<Constant>(Incoming)) continue;
 
-    Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
-                                         P->getIncomingBlock(i),
-                                         BB);
-    if (!C) continue;
+    Value *V = LVI->getConstantOnEdge(Incoming, P->getIncomingBlock(i), BB);
 
-    P->setIncomingValue(i, C);
+    // Look if the incoming value is a select with a constant but LVI tells us
+    // that the incoming value can never be that constant. In that case replace
+    // the incoming value with the other value of the select. This often allows
+    // us to remove the select later.
+    if (!V) {
+      SelectInst *SI = dyn_cast<SelectInst>(Incoming);
+      if (!SI) continue;
+
+      Constant *C = dyn_cast<Constant>(SI->getFalseValue());
+      if (!C) continue;
+
+      if (LVI->getPredicateOnEdge(ICmpInst::ICMP_EQ, SI, C,
+                                  P->getIncomingBlock(i), BB) !=
+          LazyValueInfo::False)
+        continue;
+
+      DEBUG(dbgs() << "CVP: Threading PHI over " << *SI << '\n');
+      V = SI->getTrueValue();
+    }
+
+    P->setIncomingValue(i, V);
     Changed = true;
   }
 
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index f260331c6d..e8a090af40 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -19,7 +19,7 @@
 #define DEBUG_TYPE "dce"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Instruction.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Target/TargetLibraryInfo.h"
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 124892887c..57432c7d71 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -26,12 +26,12 @@
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLibraryInfo.h"
@@ -376,10 +376,10 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
   // Check to see if the later store is to the entire object (either a global,
   // an alloca, or a byval argument).  If so, then it clearly overwrites any
   // other store to the same object.
-  const DataLayout &TD = *AA.getDataLayout();
+  const DataLayout *TD = AA.getDataLayout();
 
-  const Value *UO1 = GetUnderlyingObject(P1, &TD),
-              *UO2 = GetUnderlyingObject(P2, &TD);
+  const Value *UO1 = GetUnderlyingObject(P1, TD),
+              *UO2 = GetUnderlyingObject(P2, TD);
 
   // If we can't resolve the same pointers to the same object, then we can't
   // analyze them at all.
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 6b622c73f0..3c08634bfe 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -19,8 +19,8 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/RecyclingAllocator.h"
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 1c540b240c..c04b447f1c 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -32,12 +32,12 @@
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/DataLayout.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -849,8 +849,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
     return -1;
 
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
-  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr,StoreOffset,&TD);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, &TD);
   if (StoreBase != LoadBase)
     return -1;
 
@@ -945,7 +945,7 @@ static int AnalyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr,
   // then we should widen it!
   int64_t LoadOffs = 0;
   const Value *LoadBase =
-    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD);
+    GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, &TD);
   unsigned LoadSize = TD.getTypeStoreSize(LoadTy);
 
   unsigned Size = MemoryDependenceAnalysis::
@@ -1526,10 +1526,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   BasicBlock *LoadBB = LI->getParent();
   BasicBlock *TmpBB = LoadBB;
 
-  bool isSinglePred = false;
   bool allSingleSucc = true;
   while (TmpBB->getSinglePredecessor()) {
-    isSinglePred = true;
     TmpBB = TmpBB->getSinglePredecessor();
     if (TmpBB == LoadBB) // Infinite (unreachable) loop.
       return false;
@@ -1548,28 +1546,6 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   assert(TmpBB);
   LoadBB = TmpBB;
 
-  // FIXME: It is extremely unclear what this loop is doing, other than
-  // artificially restricting loadpre.
-  if (isSinglePred) {
-    bool isHot = false;
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-      const AvailableValueInBlock &AV = ValuesPerBlock[i];
-      if (AV.isSimpleValue())
-        // "Hot" Instruction is in some loop (because it dominates its dep.
-        // instruction).
-        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
-          if (DT->dominates(LI, I)) {
-            isHot = true;
-            break;
-          }
-    }
-
-    // We are interested only in "hot" instructions. We don't want to do any
-    // mis-optimizations here.
-    if (!isHot)
-      return false;
-  }
-
   // Check to see how many predecessors have the loaded value fully
   // available.
   DenseMap<BasicBlock*, Value*> PredLoads;
@@ -2371,8 +2347,8 @@ bool GVN::processBlock(BasicBlock *BB) {
          E = InstrsToErase.end(); I != E; ++I) {
       DEBUG(dbgs() << "GVN removed: " << **I << '\n');
       if (MD) MD->removeInstruction(*I);
-      (*I)->eraseFromParent();
       DEBUG(verifyRemoved(*I));
+      (*I)->eraseFromParent();
     }
     InstrsToErase.clear();
 
@@ -2389,7 +2365,7 @@ bool GVN::processBlock(BasicBlock *BB) {
 /// control flow patterns and attempts to perform simple PRE at the join point.
 bool GVN::performPRE(Function &F) {
   bool Changed = false;
-  DenseMap<BasicBlock*, Value*> predMap;
+  SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
   for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
        DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
     BasicBlock *CurrentBlock = *DI;
@@ -2445,19 +2421,22 @@ bool GVN::performPRE(Function &F) {
         if (P == CurrentBlock) {
           NumWithout = 2;
           break;
-        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
+        } else if (!DT->isReachableFromEntry(P))  {
           NumWithout = 2;
           break;
         }
 
         Value* predV = findLeader(P, ValNo);
         if (predV == 0) {
+          predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
           PREPred = P;
           ++NumWithout;
         } else if (predV == CurInst) {
+          /* CurInst dominates this predecessor. */
           NumWithout = 2;
+          break;
         } else {
-          predMap[P] = predV;
+          predMap.push_back(std::make_pair(predV, P));
           ++NumWith;
         }
       }
@@ -2504,15 +2483,14 @@ bool GVN::performPRE(Function &F) {
       // the PRE predecessor.  This is typically because of loads which
       // are not value numbered precisely.
       if (!success) {
-        delete PREInstr;
         DEBUG(verifyRemoved(PREInstr));
+        delete PREInstr;
         continue;
       }
 
       PREInstr->insertBefore(PREPred->getTerminator());
       PREInstr->setName(CurInst->getName() + ".pre");
       PREInstr->setDebugLoc(CurInst->getDebugLoc());
-      predMap[PREPred] = PREInstr;
       VN.add(PREInstr, ValNo);
       ++NumGVNPRE;
 
@@ -2520,13 +2498,14 @@ bool GVN::performPRE(Function &F) {
       addToLeaderTable(ValNo, PREInstr, PREPred);
 
       // Create a PHI to make the value available in this block.
-      pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
-      PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE),
+      PHINode* Phi = PHINode::Create(CurInst->getType(), predMap.size(),
                                      CurInst->getName() + ".pre-phi",
                                      CurrentBlock->begin());
-      for (pred_iterator PI = PB; PI != PE; ++PI) {
-        BasicBlock *P = *PI;
-        Phi->addIncoming(predMap[P], P);
+      for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
+        if (Value *V = predMap[i].first)
+          Phi->addIncoming(V, predMap[i].second);
+        else
+          Phi->addIncoming(PREInstr, PREPred);
       }
 
       VN.add(Phi, ValNo);
@@ -2551,8 +2530,8 @@ bool GVN::performPRE(Function &F) {
 
       DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
       if (MD) MD->removeInstruction(CurInst);
-      CurInst->eraseFromParent();
       DEBUG(verifyRemoved(CurInst));
+      CurInst->eraseFromParent();
       Changed = true;
     }
   }
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 486a349c55..1601a8d646 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -54,15 +54,15 @@
 #define DEBUG_TYPE "global-merge"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -76,7 +76,7 @@ namespace {
     const TargetLowering *TLI;
 
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                 Module &M, bool isConst) const;
+                 Module &M, bool isConst, unsigned AddrSpace) const;
 
   public:
     static char ID;             // Pass identification, replacement for typeid.
@@ -118,7 +118,7 @@ INITIALIZE_PASS(GlobalMerge, "global-merge",
 
 
 bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
-                             Module &M, bool isConst) const {
+                          Module &M, bool isConst, unsigned AddrSpace) const {
   const DataLayout *TD = TLI->getDataLayout();
 
   // FIXME: Infer the maximum possible offset depending on the actual users
@@ -150,7 +150,9 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
     GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
                                                   GlobalValue::InternalLinkage,
-                                                  MergedInit, "_MergedGlobals");
+                                                  MergedInit, "_MergedGlobals",
+                                                  0, GlobalVariable::NotThreadLocal,
+                                                  AddrSpace);
     for (size_t k = i; k < j; ++k) {
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
@@ -169,7 +171,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
 
 
 bool GlobalMerge::doInitialization(Module &M) {
-  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+  DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals,
+                                                        BSSGlobals;
   const DataLayout *TD = TLI->getDataLayout();
   unsigned MaxOffset = TLI->getMaximalGlobalOffset();
   bool Changed = false;
@@ -181,6 +184,11 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
       continue;
 
+    PointerType *PT = dyn_cast<PointerType>(I->getType());
+    assert(PT && "Global variable is not a pointer!");
+
+    unsigned AddressSpace = PT->getAddressSpace();
+
     // Ignore fancy-aligned globals for now.
     unsigned Alignment = TD->getPreferredAlignment(I);
     Type *Ty = I->getType()->getElementType();
@@ -195,18 +203,23 @@ bool GlobalMerge::doInitialization(Module &M) {
     if (TD->getTypeAllocSize(Ty) < MaxOffset) {
       if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine())
           .isBSSLocal())
-        BSSGlobals.push_back(I);
+        BSSGlobals[AddressSpace].push_back(I);
       else if (I->isConstant())
-        ConstGlobals.push_back(I);
+        ConstGlobals[AddressSpace].push_back(I);
       else
-        Globals.push_back(I);
+        Globals[AddressSpace].push_back(I);
     }
   }
 
-  if (Globals.size() > 1)
-    Changed |= doMerge(Globals, M, false);
-  if (BSSGlobals.size() > 1)
-    Changed |= doMerge(BSSGlobals, M, false);
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = Globals.begin(), E = Globals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
+
+  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
+       I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
+    if (I->second.size() > 1)
+      Changed |= doMerge(I->second, M, false, I->first);
 
   // FIXME: This currently breaks the EH processing due to way how the
   // typeinfo detection works. We might want to detect the TIs and ignore
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 29f5a10e09..97fff7e782 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -33,12 +33,13 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -47,7 +48,6 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Type.h"
 using namespace llvm;
 
 STATISTIC(NumWidened     , "Number of indvars widened");
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 4a4cd705e2..b61c5ba56e 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,9 +23,9 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/DataLayout.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -249,7 +249,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
     // as having cost of 2 total, and if they are a vector intrinsic, we model
     // them as having cost 1.
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (!isa<IntrinsicInst>(CI))
+      if (CI->hasFnAttr(Attribute::NoDuplicate))
+        // Blocks with NoDuplicate are modelled as having infinite cost, so they
+        // are never duplicated.
+        return ~0U;
+      else if (!isa<IntrinsicInst>(CI))
         Size += 3;
       else if (!CI->getType()->isVectorTy())
         Size += 1;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 7ef1d34d3f..f94cd2a073 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -40,12 +40,13 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -439,13 +440,12 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
   }
 
   // Only these instructions are hoistable/sinkable.
-  bool HoistableKind = (isa<BinaryOperator>(I) || isa<CastInst>(I) ||
-                            isa<SelectInst>(I) || isa<GetElementPtrInst>(I) ||
-                            isa<CmpInst>(I)    || isa<InsertElementInst>(I) ||
-                            isa<ExtractElementInst>(I) ||
-                            isa<ShuffleVectorInst>(I));
-  if (!HoistableKind)
-      return false;
+  if (!isa<BinaryOperator>(I) && !isa<CastInst>(I) && !isa<SelectInst>(I) &&
+      !isa<GetElementPtrInst>(I) && !isa<CmpInst>(I) &&
+      !isa<InsertElementInst>(I) && !isa<ExtractElementInst>(I) &&
+      !isa<ShuffleVectorInst>(I) && !isa<ExtractValueInst>(I) &&
+      !isa<InsertValueInst>(I))
+    return false;
 
   return isSafeToExecuteUnconditionally(I);
 }
@@ -665,16 +665,18 @@ namespace {
     AliasSetTracker &AST;
     DebugLoc DL;
     int Alignment;
+    MDNode *TBAATag;
   public:
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
                  SmallVectorImpl<BasicBlock*> &LEB,
                  SmallVectorImpl<Instruction*> &LIP,
-                 AliasSetTracker &ast, DebugLoc dl, int alignment)
+                 AliasSetTracker &ast, DebugLoc dl, int alignment,
+                 MDNode *TBAATag)
       : LoadAndStorePromoter(Insts, S), SomePtr(SP),
         PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP),
-        AST(ast), DL(dl), Alignment(alignment) {}
+        AST(ast), DL(dl), Alignment(alignment), TBAATag(TBAATag) {}
 
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
@@ -698,6 +700,7 @@ namespace {
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
         NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
+        if (TBAATag) NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
       }
     }
 
@@ -751,10 +754,11 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   // We start with an alignment of one and try to find instructions that allow
   // us to prove better alignment.
   unsigned Alignment = 1;
+  MDNode *TBAATag = 0;
 
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
-  // different sizes.
+  // different sizes.  While we are at it, collect alignment and TBAA info.
   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
     Value *ASIV = ASI->getValue();
     PointerMustAliases.insert(ASIV);
@@ -796,8 +800,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
         // instruction will be executed, update the alignment.
         // Larger is better, with the exception of 0 being the best alignment.
         unsigned InstAlignment = store->getAlignment();
-        if ((InstAlignment > Alignment || InstAlignment == 0)
-            && (Alignment != 0))
+        if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
           if (isGuaranteedToExecute(*Use)) {
             GuaranteedToExecute = true;
             Alignment = InstAlignment;
@@ -809,6 +812,15 @@ void LICM::PromoteAliasSet(AliasSet &AS,
       } else
         return; // Not a load or store.
 
+      // Merge the TBAA tags.
+      if (LoopUses.empty()) {
+        // On the first load/store, just take its TBAA tag.
+        TBAATag = Use->getMetadata(LLVMContext::MD_tbaa);
+      } else if (TBAATag) {
+        TBAATag = MDNode::getMostGenericTBAA(TBAATag,
+                                       Use->getMetadata(LLVMContext::MD_tbaa));
+      }
+      
       LoopUses.push_back(Use);
     }
   }
@@ -841,7 +853,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        InsertPts, *CurAST, DL, Alignment);
+                        InsertPts, *CurAST, DL, Alignment, TBAATag);
 
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
@@ -850,6 +862,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
                  Preheader->getTerminator());
   PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
+  if (TBAATag) PreheaderLoad->setMetadata(LLVMContext::MD_tbaa, TBAATag);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
   // Rewrite all the loads in the loop and remember all the definitions from
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 7807e9bb4f..8258719a02 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -48,15 +48,15 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLibraryInfo.h"
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -135,12 +135,12 @@ namespace {
     DominatorTree *DT;
     ScalarEvolution *SE;
     TargetLibraryInfo *TLI;
-    const ScalarTargetTransformInfo *STTI;
+    const TargetTransformInfo *TTI;
   public:
     static char ID;
     explicit LoopIdiomRecognize() : LoopPass(ID) {
       initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
-      TD = 0; DT = 0; SE = 0; TLI = 0; STTI = 0;
+      TD = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -177,6 +177,7 @@ namespace {
       AU.addPreserved<DominatorTree>();
       AU.addRequired<DominatorTree>();
       AU.addRequired<TargetLibraryInfo>();
+      AU.addRequired<TargetTransformInfo>();
     }
 
     const DataLayout *getDataLayout() {
@@ -195,12 +196,8 @@ namespace {
       return TLI ? TLI : (TLI = &getAnalysis<TargetLibraryInfo>());
     }
 
-    const ScalarTargetTransformInfo *getScalarTargetTransformInfo() {
-      if (!STTI) {
-        TargetTransformInfo *TTI = getAnalysisIfAvailable<TargetTransformInfo>();
-        if (TTI) STTI = TTI->getScalarTargetTransformInfo();
-      }
-      return STTI;
+    const TargetTransformInfo *getTargetTransformInfo() {
+      return TTI ? TTI : (TTI = &getAnalysis<TargetTransformInfo>());
     }
 
     Loop *getLoop() const { return CurLoop; }
@@ -221,6 +218,7 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                     false, false)
 
@@ -312,8 +310,8 @@ NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
 }
 
 bool NclPopcountRecognize::preliminaryScreen() {
-  const ScalarTargetTransformInfo *STTI = LIR.getScalarTargetTransformInfo();
-  if (STTI->getPopcntHwSupport(32) != ScalarTargetTransformInfo::Fast)
+  const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
+  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
     return false;
 
   // Counting population are usually conducted by few arithmetic instrutions.
@@ -409,7 +407,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
 
   // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
   {
-    if (DefX2->getOpcode() != Instruction::And)
+    if (!DefX2 || DefX2->getOpcode() != Instruction::And)
       return false;
 
     BinaryOperator *SubOneOp;
@@ -631,7 +629,7 @@ CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
 ///   call, and return true; otherwise, return false.
 bool NclPopcountRecognize::recognize() {
 
-  if (!LIR.getScalarTargetTransformInfo())
+  if (!LIR.getTargetTransformInfo())
     return false;
 
   LIR.getScalarEvolution();
@@ -669,12 +667,14 @@ bool LoopIdiomRecognize::runOnCountableLoop() {
   if (!getDataLayout())
     return false;
 
-  getDominatorTree();
+  // set DT 
+  (void)getDominatorTree();
 
   LoopInfo &LI = getAnalysis<LoopInfo>();
   TLI = &getAnalysis<TargetLibraryInfo>();
 
-  getTargetLibraryInfo();
+  // set TLI 
+  (void)getTargetLibraryInfo();
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 10ba22434a..a23860aad8 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -14,12 +14,13 @@
 #define DEBUG_TYPE "loop-instsimplify"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Instructions.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 249baf5164..e98ae953e5 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -18,9 +18,10 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Function.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -51,6 +52,7 @@ namespace {
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
     }
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -59,11 +61,13 @@ namespace {
 
   private:
     LoopInfo *LI;
+    const TargetTransformInfo *TTI;
   };
 }
 
 char LoopRotate::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -75,6 +79,7 @@ Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 /// the loop is rotated at least once.
 bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
   LI = &getAnalysis<LoopInfo>();
+  TTI = &getAnalysis<TargetTransformInfo>();
 
   // Simplify the loop latch before attempting to rotate the header
   // upward. Rotation may not be needed if the loop tail can be folded into the
@@ -274,10 +279,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
   if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
     return false;
 
-  // Check size of original header and reject loop if it is very big.
+  // Check size of original header and reject loop if it is very big or we can't
+  // duplicate blocks inside it.
   {
     CodeMetrics Metrics;
-    Metrics.analyzeBasicBlock(OrigHeader);
+    Metrics.analyzeBasicBlock(OrigHeader, *TTI);
+    if (Metrics.notDuplicatable) {
+      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
+            << " instructions: "; L->dump());
+      return false;
+    }
     if (Metrics.NumInsts > MAX_HEADER_SIZE)
       return false;
   }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index d571ba3fe0..4e4cb86464 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -37,8 +37,8 @@
 //
 // TODO: Handle multiple loops at a time.
 //
-// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
-//       instead of a GlobalValue?
+// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
+//       of a GlobalValue?
 //
 // TODO: When truncation is free, truncate ICmp users' operands to make it a
 //       smaller encoding (on x86 at least).
@@ -58,21 +58,21 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
-#include "llvm/AddressingMode.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
@@ -224,16 +224,24 @@ namespace {
 /// computing satisfying a use. It may include broken-out immediates and scaled
 /// registers.
 struct Formula {
-  /// AM - This is used to represent complex addressing, as well as other kinds
-  /// of interesting uses.
-  AddrMode AM;
+  /// Global base address used for complex addressing.
+  GlobalValue *BaseGV;
+
+  /// Base offset for complex addressing.
+  int64_t BaseOffset;
+
+  /// Whether any complex addressing has a base register.
+  bool HasBaseReg;
+
+  /// The scale of any complex addressing.
+  int64_t Scale;
 
   /// BaseRegs - The list of "base" registers for this use. When this is
-  /// non-empty, AM.HasBaseReg should be set to true.
-  SmallVector<const SCEV *, 2> BaseRegs;
+  /// non-empty,
+  SmallVector<const SCEV *, 4> BaseRegs;
 
   /// ScaledReg - The 'scaled' register for this use. This should be non-null
-  /// when AM.Scale is not zero.
+  /// when Scale is not zero.
   const SCEV *ScaledReg;
 
   /// UnfoldedOffset - An additional constant offset which added near the
@@ -241,7 +249,9 @@ struct Formula {
   /// live in an add immediate field rather than a register.
   int64_t UnfoldedOffset;
 
-  Formula() : ScaledReg(0), UnfoldedOffset(0) {}
+  Formula()
+      : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
+        UnfoldedOffset(0) {}
 
   void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
 
@@ -327,13 +337,13 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
     const SCEV *Sum = SE.getAddExpr(Good);
     if (!Sum->isZero())
       BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
   }
   if (!Bad.empty()) {
     const SCEV *Sum = SE.getAddExpr(Bad);
     if (!Sum->isZero())
       BaseRegs.push_back(Sum);
-    AM.HasBaseReg = true;
+    HasBaseReg = true;
   }
 }
 
@@ -349,7 +359,7 @@ unsigned Formula::getNumRegs() const {
 Type *Formula::getType() const {
   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
          ScaledReg ? ScaledReg->getType() :
-         AM.BaseGV ? AM.BaseGV->getType() :
+         BaseGV ? BaseGV->getType() :
          0;
 }
 
@@ -382,29 +392,29 @@ bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
 
 void Formula::print(raw_ostream &OS) const {
   bool First = true;
-  if (AM.BaseGV) {
+  if (BaseGV) {
     if (!First) OS << " + "; else First = false;
-    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
   }
-  if (AM.BaseOffs != 0) {
+  if (BaseOffset != 0) {
     if (!First) OS << " + "; else First = false;
-    OS << AM.BaseOffs;
+    OS << BaseOffset;
   }
   for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
        E = BaseRegs.end(); I != E; ++I) {
     if (!First) OS << " + "; else First = false;
     OS << "reg(" << **I << ')';
   }
-  if (AM.HasBaseReg && BaseRegs.empty()) {
+  if (HasBaseReg && BaseRegs.empty()) {
     if (!First) OS << " + "; else First = false;
     OS << "**error: HasBaseReg**";
-  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+  } else if (!HasBaseReg && !BaseRegs.empty()) {
     if (!First) OS << " + "; else First = false;
     OS << "**error: !HasBaseReg**";
   }
-  if (AM.Scale != 0) {
+  if (Scale != 0) {
     if (!First) OS << " + "; else First = false;
-    OS << AM.Scale << "*reg(";
+    OS << Scale << "*reg(";
     if (ScaledReg)
       OS << *ScaledReg;
     else
@@ -927,8 +937,8 @@ void Cost::RateFormula(const Formula &F,
   // Tally up the non-zero immediates.
   for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
        E = Offsets.end(); I != E; ++I) {
-    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
-    if (F.AM.BaseGV)
+    int64_t Offset = (uint64_t)*I + F.BaseOffset;
+    if (F.BaseGV)
       ImmCost += 64; // Handle symbolic values conservatively.
                      // TODO: This should probably be the pointer size.
     else if (Offset != 0)
@@ -1078,19 +1088,19 @@ namespace {
 /// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
 /// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
 struct UniquifierDenseMapInfo {
-  static SmallVector<const SCEV *, 2> getEmptyKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getEmptyKey() {
+    SmallVector<const SCEV *, 4>  V;
     V.push_back(reinterpret_cast<const SCEV *>(-1));
     return V;
   }
 
-  static SmallVector<const SCEV *, 2> getTombstoneKey() {
-    SmallVector<const SCEV *, 2> V;
+  static SmallVector<const SCEV *, 4> getTombstoneKey() {
+    SmallVector<const SCEV *, 4> V;
     V.push_back(reinterpret_cast<const SCEV *>(-2));
     return V;
   }
 
-  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
     unsigned Result = 0;
     for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
          E = V.end(); I != E; ++I)
@@ -1098,8 +1108,8 @@ struct UniquifierDenseMapInfo {
     return Result;
   }
 
-  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
-                      const SmallVector<const SCEV *, 2> &RHS) {
+  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+                      const SmallVector<const SCEV *, 4> &RHS) {
     return LHS == RHS;
   }
 };
@@ -1110,7 +1120,7 @@ struct UniquifierDenseMapInfo {
 /// the user itself, and information about how the use may be satisfied.
 /// TODO: Represent multiple users of the same expression in common?
 class LSRUse {
-  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
 
 public:
   /// KindType - An enum for a kind of use, indicating what types of
@@ -1169,7 +1179,7 @@ public:
 /// HasFormula - Test whether this use as a formula which has the same
 /// registers as the given formula.
 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
   std::sort(Key.begin(), Key.end());
@@ -1179,7 +1189,7 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
 /// InsertFormula - If the given formula has not yet been inserted, add it to
 /// the list, and return true. Return false otherwise.
 bool LSRUse::InsertFormula(const Formula &F) {
-  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
   // Unstable sort by host order ok, because this is only used for uniquifying.
   std::sort(Key.begin(), Key.end());
@@ -1270,46 +1280,42 @@ void LSRUse::dump() const {
 /// isLegalUse - Test whether the use described by AM is "legal", meaning it can
 /// be completely folded into the user instruction at isel time. This includes
 /// address-mode folding and special icmp tricks.
-static bool isLegalUse(const AddrMode &AM,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
+                       Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
+                       bool HasBaseReg, int64_t Scale) {
   switch (Kind) {
   case LSRUse::Address:
-    // If we have low-level target information, ask the target if it can
-    // completely fold this address.
-    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+    return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
 
     // Otherwise, just guess that reg+reg addressing is legal.
-    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+    //return ;
 
   case LSRUse::ICmpZero:
     // There's not even a target hook for querying whether it would be legal to
     // fold a GV into an ICmp.
-    if (AM.BaseGV)
+    if (BaseGV)
       return false;
 
     // ICmp only has two operands; don't allow more than two non-trivial parts.
-    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+    if (Scale != 0 && HasBaseReg && BaseOffset != 0)
       return false;
 
     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
     // putting the scaled register in the other operand of the icmp.
-    if (AM.Scale != 0 && AM.Scale != -1)
+    if (Scale != 0 && Scale != -1)
       return false;
 
     // If we have low-level target information, ask the target if it can fold an
     // integer immediate on an icmp.
-    if (AM.BaseOffs != 0) {
-      if (!TLI)
-        return false;
+    if (BaseOffset != 0) {
       // We have one of:
-      // ICmpZero     BaseReg + Offset => ICmp BaseReg, -Offset
-      // ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
+      // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
+      // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
       // Offs is the ICmp immediate.
-      int64_t Offs = AM.BaseOffs;
-      if (AM.Scale == 0)
-        Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
-      return TLI->isLegalICmpImmediate(Offs);
+      if (Scale == 0)
+        // The cast does the right thing with INT64_MIN.
+        BaseOffset = -(uint64_t)BaseOffset;
+      return TTI.isLegalICmpImmediate(BaseOffset);
     }
 
     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
@@ -1317,92 +1323,87 @@ static bool isLegalUse(const AddrMode &AM,
 
   case LSRUse::Basic:
     // Only handle single-register values.
-    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+    return !BaseGV && Scale == 0 && BaseOffset == 0;
 
   case LSRUse::Special:
     // Special case Basic to handle -1 scales.
-    return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
+    return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
   }
 
   llvm_unreachable("Invalid LSRUse Kind!");
 }
 
-static bool isLegalUse(AddrMode AM,
-                       int64_t MinOffset, int64_t MaxOffset,
-                       LSRUse::KindType Kind, Type *AccessTy,
-                       const TargetLowering *TLI) {
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+                       int64_t Scale) {
   // Check for overflow.
-  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+  if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
       (MinOffset > 0))
     return false;
-  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
-  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
-    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
-    // Check for overflow.
-    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
-        (MaxOffset > 0))
-      return false;
-    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
-    return isLegalUse(AM, Kind, AccessTy, TLI);
-  }
-  return false;
+  MinOffset = (uint64_t)BaseOffset + MinOffset;
+  if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
+      (MaxOffset > 0))
+    return false;
+  MaxOffset = (uint64_t)BaseOffset + MaxOffset;
+
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
+                    Scale) &&
+         isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
 }
 
-static bool isAlwaysFoldable(int64_t BaseOffs,
-                             GlobalValue *BaseGV,
-                             bool HasBaseReg,
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+                       const Formula &F) {
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
+                    F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
                              LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI) {
+                             GlobalValue *BaseGV, int64_t BaseOffset,
+                             bool HasBaseReg) {
   // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
   // Canonicalize a scale of 1 to a base register if the formula doesn't
   // already have a base register.
-  if (!AM.HasBaseReg && AM.Scale == 1) {
-    AM.Scale = 0;
-    AM.HasBaseReg = true;
+  if (!HasBaseReg && Scale == 1) {
+    Scale = 0;
+    HasBaseReg = true;
   }
 
-  return isLegalUse(AM, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
 }
 
-static bool isAlwaysFoldable(const SCEV *S,
-                             int64_t MinOffset, int64_t MaxOffset,
-                             bool HasBaseReg,
-                             LSRUse::KindType Kind, Type *AccessTy,
-                             const TargetLowering *TLI,
-                             ScalarEvolution &SE) {
+static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
+                             ScalarEvolution &SE, int64_t MinOffset,
+                             int64_t MaxOffset, LSRUse::KindType Kind,
+                             Type *AccessTy, const SCEV *S, bool HasBaseReg) {
   // Fast-path: zero is always foldable.
   if (S->isZero()) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  int64_t BaseOffs = ExtractImmediate(S, SE);
+  int64_t BaseOffset = ExtractImmediate(S, SE);
   GlobalValue *BaseGV = ExtractSymbol(S, SE);
 
   // If there's anything else involved, it's not foldable.
   if (!S->isZero()) return false;
 
   // Fast-path: zero is always foldable.
-  if (BaseOffs == 0 && !BaseGV) return true;
+  if (BaseOffset == 0 && !BaseGV) return true;
 
   // Conservatively, create an address with an immediate and a
   // base and a scale.
-  AddrMode AM;
-  AM.BaseOffs = BaseOffs;
-  AM.BaseGV = BaseGV;
-  AM.HasBaseReg = HasBaseReg;
-  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+  int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
 
-  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+  return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+                    BaseOffset, HasBaseReg, Scale);
 }
 
 namespace {
@@ -1502,7 +1503,7 @@ class LSRInstance {
   ScalarEvolution &SE;
   DominatorTree &DT;
   LoopInfo &LI;
-  const TargetLowering *const TLI;
+  const TargetTransformInfo &TTI;
   Loop *const L;
   bool Changed;
 
@@ -1638,7 +1639,7 @@ class LSRInstance {
                          Pass *P);
 
 public:
-  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+  LSRInstance(Loop *L, Pass *P);
 
   bool getChanged() const { return Changed; }
 
@@ -1688,12 +1689,9 @@ void LSRInstance::OptimizeShadowIV() {
     }
     if (!DestTy) continue;
 
-    if (TLI) {
-      // If target does not support DestTy natively then do not apply
-      // this transformation.
-      EVT DVT = TLI->getValueType(DestTy);
-      if (!TLI->isTypeLegal(DVT)) continue;
-    }
+    // If target does not support DestTy natively then do not apply
+    // this transformation.
+    if (!TTI.isTypeLegal(DestTy)) continue;
 
     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
     if (!PH) continue;
@@ -2015,18 +2013,17 @@ LSRInstance::OptimizeLoopTermCond() {
             if (C->getValue().getMinSignedBits() >= 64 ||
                 C->getValue().isMinSignedValue())
               goto decline_post_inc;
-            // Without TLI, assume that any stride might be valid, and so any
-            // use might be shared.
-            if (!TLI)
-              goto decline_post_inc;
             // Check for possible scaled-address reuse.
             Type *AccessTy = getAccessType(UI->getUser());
-            AddrMode AM;
-            AM.Scale = C->getSExtValue();
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            int64_t Scale = C->getSExtValue();
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
               goto decline_post_inc;
-            AM.Scale = -AM.Scale;
-            if (TLI->isLegalAddressingMode(AM, AccessTy))
+            Scale = -Scale;
+            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+                                          /*BaseOffset=*/ 0,
+                                          /*HasBaseReg=*/ false, Scale))
               goto decline_post_inc;
           }
         }
@@ -2096,13 +2093,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
     return false;
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
-    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          LU.MaxOffset - NewOffset, HasBaseReg))
       return false;
     NewMinOffset = NewOffset;
   } else if (NewOffset > LU.MaxOffset) {
-    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
-                          Kind, AccessTy, TLI))
+    if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                          NewOffset - LU.MinOffset, HasBaseReg))
       return false;
     NewMaxOffset = NewOffset;
   }
@@ -2131,7 +2128,8 @@ LSRInstance::getUse(const SCEV *&Expr,
   int64_t Offset = ExtractImmediate(Expr, SE);
 
   // Basic uses can't accept any offset, for example.
-  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+  if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+                        Offset, /*HasBaseReg=*/ true)) {
     Expr = Copy;
     Offset = 0;
   }
@@ -2199,10 +2197,10 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
         // as OrigF.
         if (F.BaseRegs == OrigF.BaseRegs &&
             F.ScaledReg == OrigF.ScaledReg &&
-            F.AM.BaseGV == OrigF.AM.BaseGV &&
-            F.AM.Scale == OrigF.AM.Scale &&
+            F.BaseGV == OrigF.BaseGV &&
+            F.Scale == OrigF.Scale &&
             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
-          if (F.AM.BaseOffs == 0)
+          if (F.BaseOffset == 0)
             return &LU;
           // This is the formula where all the registers and symbols matched;
           // there aren't going to be any others. Since we declined it, we
@@ -2396,7 +2394,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
 /// TODO: Consider IVInc free if it's already used in another chains.
 static bool
 isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
-                  ScalarEvolution &SE, const TargetLowering *TLI) {
+                  ScalarEvolution &SE, const TargetTransformInfo &TTI) {
   if (StressIVChain)
     return true;
 
@@ -2539,6 +2537,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
     // Add this IV user to the end of the chain.
     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
   }
+  IVChain &Chain = IVChainVec[ChainIdx];
 
   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
   // This chain's NearUsers become FarUsers.
@@ -2556,8 +2555,19 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
   for (Value::use_iterator UseIter = IVOper->use_begin(),
          UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) {
     Instruction *OtherUse = dyn_cast<Instruction>(*UseIter);
-    if (!OtherUse || OtherUse == UserInst)
+    if (!OtherUse)
+      continue;
+    // Uses in the chain will no longer be uses if the chain is formed.
+    // Include the head of the chain in this iteration (not Chain.begin()).
+    IVChain::const_iterator IncIter = Chain.Incs.begin();
+    IVChain::const_iterator IncEnd = Chain.Incs.end();
+    for( ; IncIter != IncEnd; ++IncIter) {
+      if (IncIter->UserInst == OtherUse)
+        break;
+    }
+    if (IncIter != IncEnd)
       continue;
+
     if (SE.isSCEVable(OtherUse->getType())
         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
         && IU.isIVUserOrOperand(OtherUse)) {
@@ -2654,7 +2664,7 @@ void LSRInstance::CollectChains() {
   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
        UsersIdx < NChains; ++UsersIdx) {
     if (!isProfitableChain(IVChainVec[UsersIdx],
-                           ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
+                           ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
       continue;
     // Preserve the chain at UsesIdx.
     if (ChainIdx != UsersIdx)
@@ -2681,7 +2691,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
 
 /// Return true if the IVInc can be folded into an addressing mode.
 static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
-                             Value *Operand, const TargetLowering *TLI) {
+                             Value *Operand, const TargetTransformInfo &TTI) {
   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
   if (!IncConst || !isAddressUse(UserInst, Operand))
     return false;
@@ -2690,8 +2700,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
     return false;
 
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
-  if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
-                       LSRUse::Address, getAccessType(UserInst), TLI))
+  if (!isAlwaysFoldable(TTI, LSRUse::Address,
+                        getAccessType(UserInst), /*BaseGV=*/ 0,
+                        IncOffset, /*HaseBaseReg=*/ false))
     return false;
 
   return true;
@@ -2762,7 +2773,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
 
       // If an IV increment can't be folded, use it as the next IV value.
       if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
-                            TLI)) {
+                            TTI)) {
         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
         IVSrc = IVOper;
         LeftOverExpr = 0;
@@ -2904,7 +2915,7 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                        LSRUse &LU, size_t LUIdx) {
   Formula F;
   F.BaseRegs.push_back(S);
-  F.AM.HasBaseReg = true;
+  F.HasBaseReg = true;
   bool Inserted = InsertFormula(LU, LUIdx, F);
   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
 }
@@ -3106,9 +3117,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Don't pull a constant into a register if the constant could be folded
       // into an immediate field.
-      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+      if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, *J, Base.getNumRegs() > 1))
         continue;
 
       // Collect all operands except *J.
@@ -3120,9 +3130,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
       // Don't leave just a constant behind in a register if the constant could
       // be folded into an immediate field.
       if (InnerAddOps.size() == 1 &&
-          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
-                           Base.getNumRegs() > 1,
-                           LU.Kind, LU.AccessTy, TLI, SE))
+          isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+                           LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
         continue;
 
       const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
@@ -3132,10 +3141,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add the remaining pieces of the add back into the new formula.
       const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
-      if (TLI && InnerSumSC &&
+      if (InnerSumSC &&
           SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   InnerSumSC->getValue()->getZExtValue())) {
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  InnerSumSC->getValue()->getZExtValue())) {
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            InnerSumSC->getValue()->getZExtValue();
         F.BaseRegs.erase(F.BaseRegs.begin() + i);
@@ -3144,9 +3153,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
 
       // Add J as its own register, or an unfolded immediate.
       const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
-      if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
-          TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
-                                   SC->getValue()->getZExtValue()))
+      if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+          TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+                                  SC->getValue()->getZExtValue()))
         F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
                            SC->getValue()->getZExtValue();
       else
@@ -3195,7 +3204,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
                                           Formula Base) {
   // We can't add a symbolic offset if the address already contains one.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
 
   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
     const SCEV *G = Base.BaseRegs[i];
@@ -3203,9 +3212,8 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
     if (G->isZero() || !GV)
       continue;
     Formula F = Base;
-    F.AM.BaseGV = GV;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseGV = GV;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3228,9 +3236,9 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
     for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
          E = Worklist.end(); I != E; ++I) {
       Formula F = Base;
-      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
-      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
-                     LU.Kind, LU.AccessTy, TLI)) {
+      F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+      if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+                     LU.AccessTy, F)) {
         // Add the offset to the base register.
         const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
         // If it cancelled out, drop the base register, otherwise update it.
@@ -3248,9 +3256,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
     if (G->isZero() || Imm == 0)
       continue;
     Formula F = Base;
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
-    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI))
+    F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
       continue;
     F.BaseRegs[i] = G;
     (void)InsertFormula(LU, LUIdx, F);
@@ -3271,7 +3278,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
   // Don't do this if there is more than one offset.
   if (LU.MinOffset != LU.MaxOffset) return;
 
-  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+  assert(!Base.BaseGV && "ICmpZero use is not legal!");
 
   // Check each interesting stride.
   for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3279,10 +3286,10 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
     int64_t Factor = *I;
 
     // Check that the multiplication doesn't overflow.
-    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+    if (Base.BaseOffset == INT64_MIN && Factor == -1)
       continue;
-    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
-    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+    int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor;
+    if (NewBaseOffset / Factor != Base.BaseOffset)
       continue;
 
     // Check that multiplying with the use offset doesn't overflow.
@@ -3294,14 +3301,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
       continue;
 
     Formula F = Base;
-    F.AM.BaseOffs = NewBaseOffs;
+    F.BaseOffset = NewBaseOffset;
 
     // Check that this scale is legal.
-    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+    if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
       continue;
 
     // Compensate for the use having MinOffset built into it.
-    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+    F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset;
 
     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
 
@@ -3342,23 +3349,23 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   if (!IntTy) return;
 
   // If this Formula already has a scaled register, we can't add another one.
-  if (Base.AM.Scale != 0) return;
+  if (Base.Scale != 0) return;
 
   // Check each interesting stride.
   for (SmallSetVector<int64_t, 8>::const_iterator
        I = Factors.begin(), E = Factors.end(); I != E; ++I) {
     int64_t Factor = *I;
 
-    Base.AM.Scale = Factor;
-    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    Base.Scale = Factor;
+    Base.HasBaseReg = Base.BaseRegs.size() > 1;
     // Check whether this scale is going to be legal.
-    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                    LU.Kind, LU.AccessTy, TLI)) {
+    if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                    Base)) {
       // As a special-case, handle special out-of-loop Basic users specially.
       // TODO: Reconsider this special case.
       if (LU.Kind == LSRUse::Basic &&
-          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
-                     LSRUse::Special, LU.AccessTy, TLI) &&
+          isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
+                     LU.AccessTy, Base) &&
           LU.AllFixupsOutsideLoop)
         LU.Kind = LSRUse::Special;
       else
@@ -3367,7 +3374,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
     // For an ICmpZero, negating a solitary base register won't lead to
     // new solutions.
     if (LU.Kind == LSRUse::ICmpZero &&
-        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+        !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV)
       continue;
     // For each addrec base reg, apply the scale, if possible.
     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
@@ -3391,11 +3398,8 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
 
 /// GenerateTruncates - Generate reuse formulae from different IV types.
 void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
-  // This requires TargetLowering to tell us which truncates are free.
-  if (!TLI) return;
-
   // Don't bother truncating symbolic values.
-  if (Base.AM.BaseGV) return;
+  if (Base.BaseGV) return;
 
   // Determine the integer type for the base formula.
   Type *DstTy = Base.getType();
@@ -3405,7 +3409,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   for (SmallSetVector<Type *, 4>::const_iterator
        I = Types.begin(), E = Types.end(); I != E; ++I) {
     Type *SrcTy = *I;
-    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+    if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
       Formula F = Base;
 
       if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
@@ -3552,16 +3556,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       const Formula &F = LU.Formulae[L];
       // Use the immediate in the scaled register.
       if (F.ScaledReg == OrigReg) {
-        int64_t Offs = (uint64_t)F.AM.BaseOffs +
-                       Imm * (uint64_t)F.AM.Scale;
+        int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
         // Don't create 50 + reg(-50).
         if (F.referencesReg(SE.getSCEV(
-                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+                   ConstantInt::get(IntTy, -(uint64_t)Offset))))
           continue;
         Formula NewF = F;
-        NewF.AM.BaseOffs = Offs;
-        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                        LU.Kind, LU.AccessTy, TLI))
+        NewF.BaseOffset = Offset;
+        if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        NewF))
           continue;
         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
 
@@ -3570,9 +3573,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
           if (C->getValue()->isNegative() !=
-                (NewF.AM.BaseOffs < 0) &&
-              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
-                .ule(abs64(NewF.AM.BaseOffs)))
+                (NewF.BaseOffset < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
+                .ule(abs64(NewF.BaseOffset)))
             continue;
 
         // OK, looks good.
@@ -3584,11 +3587,10 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           if (BaseReg != OrigReg)
             continue;
           Formula NewF = F;
-          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
-          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI)) {
-            if (!TLI ||
-                !TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
+          NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm;
+          if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, NewF)) {
+            if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
               continue;
             NewF = F;
             NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
@@ -3602,11 +3604,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
                J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
                J != JE; ++J)
             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
-              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
-                   abs64(NewF.AM.BaseOffs)) &&
+              if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
+                   abs64(NewF.BaseOffset)) &&
                   (C->getValue()->getValue() +
-                   NewF.AM.BaseOffs).countTrailingZeros() >=
-                   CountTrailingZeros_64(NewF.AM.BaseOffs))
+                   NewF.BaseOffset).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.BaseOffset))
                 goto skip_formula;
 
           // Ok, looks good.
@@ -3667,7 +3669,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
   // Collect the best formula for each unique set of shared registers. This
   // is reset for each use.
-  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+  typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>
     BestFormulaeTy;
   BestFormulaeTy BestFormulae;
 
@@ -3702,7 +3704,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
               dbgs() << "\n");
       }
       else {
-        SmallVector<const SCEV *, 2> Key;
+        SmallVector<const SCEV *, 4> Key;
         for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
                JE = F.BaseRegs.end(); J != JE; ++J) {
           const SCEV *Reg = *J;
@@ -3804,7 +3806,7 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
             Formula NewF = F;
-            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseOffset += C->getValue()->getSExtValue();
             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                 (I - F.BaseRegs.begin()));
             if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3817,9 +3819,9 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
             }
           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
-              if (!F.AM.BaseGV) {
+              if (!F.BaseGV) {
                 Formula NewF = F;
-                NewF.AM.BaseGV = GV;
+                NewF.BaseGV = GV;
                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
                                     (I - F.BaseRegs.begin()));
                 if (LU.HasFormulaWithSameRegs(NewF)) {
@@ -3848,84 +3850,83 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
 /// for expressions like A, A+1, A+2, etc., allocate a single register for
 /// them.
 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
-  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
-    DEBUG(dbgs() << "The search space is too complex.\n");
+  if (EstimateSearchSpaceComplexity() < ComplexityLimit)
+    return;
 
-    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
-                    "separated by a constant offset will use the same "
-                    "registers.\n");
+  DEBUG(dbgs() << "The search space is too complex.\n"
+                  "Narrowing the search space by assuming that uses separated "
+                  "by a constant offset will use the same registers.\n");
 
-    // This is especially useful for unrolled loops.
+  // This is especially useful for unrolled loops.
 
-    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
-      LSRUse &LU = Uses[LUIdx];
-      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
-           E = LU.Formulae.end(); I != E; ++I) {
-        const Formula &F = *I;
-        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
-          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
-            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
-                                   /*HasBaseReg=*/false,
-                                   LU.Kind, LU.AccessTy)) {
-              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
-                    dbgs() << '\n');
-
-              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
-
-              // Update the relocs to reference the new use.
-              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
-                   E = Fixups.end(); I != E; ++I) {
-                LSRFixup &Fixup = *I;
-                if (Fixup.LUIdx == LUIdx) {
-                  Fixup.LUIdx = LUThatHas - &Uses.front();
-                  Fixup.Offset += F.AM.BaseOffs;
-                  // Add the new offset to LUThatHas' offset list.
-                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
-                    LUThatHas->Offsets.push_back(Fixup.Offset);
-                    if (Fixup.Offset > LUThatHas->MaxOffset)
-                      LUThatHas->MaxOffset = Fixup.Offset;
-                    if (Fixup.Offset < LUThatHas->MinOffset)
-                      LUThatHas->MinOffset = Fixup.Offset;
-                  }
-                  DEBUG(dbgs() << "New fixup has offset "
-                               << Fixup.Offset << '\n');
-                }
-                if (Fixup.LUIdx == NumUses-1)
-                  Fixup.LUIdx = LUIdx;
-              }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+         E = LU.Formulae.end(); I != E; ++I) {
+      const Formula &F = *I;
+      if (F.BaseOffset == 0 || F.Scale != 0)
+        continue;
 
-              // Delete formulae from the new use which are no longer legal.
-              bool Any = false;
-              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
-                Formula &F = LUThatHas->Formulae[i];
-                if (!isLegalUse(F.AM,
-                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
-                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
-                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
-                        dbgs() << '\n');
-                  LUThatHas->DeleteFormula(F);
-                  --i;
-                  --e;
-                  Any = true;
-                }
-              }
-              if (Any)
-                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+      LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
+      if (!LUThatHas)
+        continue;
 
-              // Delete the old use.
-              DeleteUse(LU, LUIdx);
-              --LUIdx;
-              --NumUses;
-              break;
-            }
+      if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
+                              LU.Kind, LU.AccessTy))
+        continue;
+
+      DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
+
+      LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+      // Update the relocs to reference the new use.
+      for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+           E = Fixups.end(); I != E; ++I) {
+        LSRFixup &Fixup = *I;
+        if (Fixup.LUIdx == LUIdx) {
+          Fixup.LUIdx = LUThatHas - &Uses.front();
+          Fixup.Offset += F.BaseOffset;
+          // Add the new offset to LUThatHas' offset list.
+          if (LUThatHas->Offsets.back() != Fixup.Offset) {
+            LUThatHas->Offsets.push_back(Fixup.Offset);
+            if (Fixup.Offset > LUThatHas->MaxOffset)
+              LUThatHas->MaxOffset = Fixup.Offset;
+            if (Fixup.Offset < LUThatHas->MinOffset)
+              LUThatHas->MinOffset = Fixup.Offset;
           }
+          DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
         }
+        if (Fixup.LUIdx == NumUses-1)
+          Fixup.LUIdx = LUIdx;
       }
-    }
 
-    DEBUG(dbgs() << "After pre-selection:\n";
-          print_uses(dbgs()));
+      // Delete formulae from the new use which are no longer legal.
+      bool Any = false;
+      for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+        Formula &F = LUThatHas->Formulae[i];
+        if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                        LUThatHas->Kind, LUThatHas->AccessTy, F)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                dbgs() << '\n');
+          LUThatHas->DeleteFormula(F);
+          --i;
+          --e;
+          Any = true;
+        }
+      }
+
+      if (Any)
+        LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+      // Delete the old use.
+      DeleteUse(LU, LUIdx);
+      --LUIdx;
+      --NumUses;
+      break;
+    }
   }
+
+  DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
 }
 
 /// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
@@ -4308,7 +4309,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 
   // Expand the ScaledReg portion.
   Value *ICmpScaledV = 0;
-  if (F.AM.Scale != 0) {
+  if (F.Scale != 0) {
     const SCEV *ScaledS = F.ScaledReg;
 
     // If we're expanding for a post-inc user, make the post-inc adjustment.
@@ -4321,7 +4322,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // An interesting way of "folding" with an icmp is to use a negated
       // scale, which we'll implement by inserting it into the other operand
       // of the icmp.
-      assert(F.AM.Scale == -1 &&
+      assert(F.Scale == -1 &&
              "The only scale supported by ICmpZero uses is -1!");
       ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
     } else {
@@ -4336,20 +4337,20 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       }
       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
       ScaledS = SE.getMulExpr(ScaledS,
-                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
+                              SE.getConstant(ScaledS->getType(), F.Scale));
       Ops.push_back(ScaledS);
     }
   }
 
   // Expand the GV portion.
-  if (F.AM.BaseGV) {
+  if (F.BaseGV) {
     // Flush the operand list to suppress SCEVExpander hoisting.
     if (!Ops.empty()) {
       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
       Ops.clear();
       Ops.push_back(SE.getUnknown(FullV));
     }
-    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+    Ops.push_back(SE.getUnknown(F.BaseGV));
   }
 
   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
@@ -4361,7 +4362,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   }
 
   // Expand the immediate portion.
-  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset;
   if (Offset != 0) {
     if (LU.Kind == LSRUse::ICmpZero) {
       // The other interesting way of "folding" with an ICmpZero is to use a
@@ -4402,9 +4403,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (LU.Kind == LSRUse::ICmpZero) {
     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
     DeadInsts.push_back(CI->getOperand(1));
-    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+    assert(!F.BaseGV && "ICmp does not support folding a global value and "
                            "a scale at the same time!");
-    if (F.AM.Scale == -1) {
+    if (F.Scale == -1) {
       if (ICmpScaledV->getType() != OpTy) {
         Instruction *Cast =
           CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
@@ -4414,7 +4415,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       }
       CI->setOperand(1, ICmpScaledV);
     } else {
-      assert(F.AM.Scale == 0 &&
+      assert(F.Scale == 0 &&
              "ICmp does not support folding a global value and "
              "a scale at the same time!");
       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4589,13 +4590,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
 }
 
-LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
-  : IU(P->getAnalysis<IVUsers>()),
-    SE(P->getAnalysis<ScalarEvolution>()),
-    DT(P->getAnalysis<DominatorTree>()),
-    LI(P->getAnalysis<LoopInfo>()),
-    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
-
+LSRInstance::LSRInstance(Loop *L, Pass *P)
+    : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
+      DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
+      TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
+      IVIncInsertPos(0) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;
@@ -4678,14 +4677,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
 
 #ifndef NDEBUG
   // Formulae should be legal.
-  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
-       E = Uses.end(); I != E; ++I) {
-     const LSRUse &LU = *I;
-     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
-          JE = LU.Formulae.end(); J != JE; ++J)
-        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
-                          LU.Kind, LU.AccessTy, TLI) &&
-               "Illegal formula generated!");
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
+       I != E; ++I) {
+    const LSRUse &LU = *I;
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+                                                  JE = LU.Formulae.end();
+         J != JE; ++J)
+      assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
+                        *J) && "Illegal formula generated!");
   };
 #endif
 
@@ -4757,13 +4756,9 @@ void LSRInstance::dump() const {
 namespace {
 
 class LoopStrengthReduce : public LoopPass {
-  /// TLI - Keep a pointer of a TargetLowering to consult for determining
-  /// transformation profitability.
-  const TargetLowering *const TLI;
-
 public:
   static char ID; // Pass ID, replacement for typeid
-  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+  LoopStrengthReduce();
 
 private:
   bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -4775,6 +4770,7 @@ private:
 char LoopStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(DominatorTree)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_DEPENDENCY(IVUsers)
@@ -4784,14 +4780,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
 
 
-Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
-  return new LoopStrengthReduce(TLI);
+Pass *llvm::createLoopStrengthReducePass() {
+  return new LoopStrengthReduce();
 }
 
-LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
-  : LoopPass(ID), TLI(tli) {
-    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
-  }
+LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
+  initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+}
 
 void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   // We split critical edges, so we change the CFG.  However, we do update
@@ -4810,24 +4805,27 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<IVUsers>();
   AU.addPreserved<IVUsers>();
+  AU.addRequired<TargetTransformInfo>();
 }
 
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   bool Changed = false;
 
   // Run the main LSR transformation.
-  Changed |= LSRInstance(TLI, L, this).getChanged();
+  Changed |= LSRInstance(L, this).getChanged();
 
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
-  if (EnablePhiElim) {
+  if (EnablePhiElim && L->isLoopSimplifyForm()) {
     SmallVector<WeakVH, 16> DeadInsts;
     SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr");
 #ifndef NDEBUG
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
-    unsigned numFolded = Rewriter.
-      replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
+    unsigned numFolded =
+        Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
+                                     DeadInsts,
+                                     &getAnalysis<TargetTransformInfo>());
     if (numFolded) {
       Changed = true;
       DeleteTriviallyDeadInstructions(DeadInsts);
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 2b15528411..80d060b926 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,8 +17,9 @@
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/DataLayout.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -90,6 +91,7 @@ namespace {
       AU.addPreservedID(LCSSAID);
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
@@ -101,6 +103,7 @@ namespace {
 
 char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -113,12 +116,14 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
 
 /// ApproximateLoopSize - Approximate the size of the loop.
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
-                                    const DataLayout *TD) {
+                                    bool &NotDuplicatable,
+                                    const TargetTransformInfo &TTI) {
   CodeMetrics Metrics;
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
-    Metrics.analyzeBasicBlock(*I, TD);
+    Metrics.analyzeBasicBlock(*I, TTI);
   NumCalls = Metrics.NumInlineCandidates;
+  NotDuplicatable = Metrics.notDuplicatable;
 
   unsigned LoopSize = Metrics.NumInsts;
 
@@ -133,6 +138,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
 bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   LoopInfo *LI = &getAnalysis<LoopInfo>();
   ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -145,8 +151,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   // not user specified.
   unsigned Threshold = CurrentThreshold;
   if (!UserThreshold &&
-      Header->getParent()->getFnAttributes().
-        hasAttribute(Attributes::OptimizeForSize))
+      Header->getParent()->getAttributes().
+        hasAttribute(AttributeSet::FunctionIndex,
+                     Attribute::OptimizeForSize))
     Threshold = OptSizeUnrollThreshold;
 
   // Find trip count and trip multiple if count is not available
@@ -179,10 +186,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Enforce the threshold.
   if (Threshold != NoThreshold) {
-    const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
     unsigned NumInlineCandidates;
-    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
+    bool notDuplicatable;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
+                                            notDuplicatable, TTI);
     DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+    if (notDuplicatable) {
+      DEBUG(dbgs() << "  Not unrolling loop which contains non duplicatable"
+            << " instructions.\n");
+      return false;
+    }
     if (NumInlineCandidates != 0) {
       DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
       return false;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index d41da4a9a9..0e8199f2fd 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -37,10 +37,11 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -101,7 +102,7 @@ namespace {
 
       // Analyze loop. Check its size, calculate is it possible to unswitch
       // it. Returns true if we can unswitch this loop.
-      bool countLoop(const Loop* L);
+      bool countLoop(const Loop* L, const TargetTransformInfo &TTI);
 
       // Clean all data related to given loop.
       void forgetLoop(const Loop* L);
@@ -170,6 +171,7 @@ namespace {
       AU.addPreservedID(LCSSAID);
       AU.addPreserved<DominatorTree>();
       AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<TargetTransformInfo>();
     }
 
   private:
@@ -221,7 +223,7 @@ namespace {
 
 // Analyze loop. Check its size, calculate is it possible to unswitch
 // it. Returns true if we can unswitch this loop.
-bool LUAnalysisCache::countLoop(const Loop* L) {
+bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
 
   std::pair<LoopPropsMapIt, bool> InsertRes =
       LoopsProperties.insert(std::make_pair(L, LoopProperties()));
@@ -243,11 +245,18 @@ bool LUAnalysisCache::countLoop(const Loop* L) {
     for (Loop::block_iterator I = L->block_begin(),
            E = L->block_end();
          I != E; ++I)
-      Metrics.analyzeBasicBlock(*I);
+      Metrics.analyzeBasicBlock(*I, TTI);
 
     Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
     Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
     MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
+
+    if (Metrics.notDuplicatable) {
+      DEBUG(dbgs() << "NOT unswitching loop %"
+            << L->getHeader()->getName() << ", contents cannot be "
+            << "duplicated!\n");
+      return false;
+    }
   }
 
   if (!Props.CanBeUnswitchedCount) {
@@ -327,6 +336,7 @@ void LUAnalysisCache::cloneData(const Loop* NewLoop, const Loop* OldLoop,
 char LoopUnswitch::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
                       false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LoopInfo)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
@@ -417,7 +427,7 @@ bool LoopUnswitch::processCurrentLoop() {
 
   // Probably we reach the quota of branches for this loop. If so
   // stop unswitching.
-  if (!BranchesInfo.countLoop(currentLoop))
+  if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
     return false;
 
   // Loop over all of the basic blocks in the loop.  If we find an interior
@@ -639,7 +649,8 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
 
   // Do not do non-trivial unswitch while optimizing for size.
   if (OptimizeForSize ||
-      F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize))
+      F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::OptimizeForSize))
     return false;
 
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 7419a6543e..8ced4946c8 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -14,9 +14,9 @@
 
 #define DEBUG_TYPE "loweratomic"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 26b6269f42..be0f0e8a25 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -20,11 +20,11 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/DataLayout.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
deleted file mode 100644
index ce397658bf..0000000000
--- a/lib/Transforms/Scalar/ObjCARC.cpp
+++ /dev/null
@@ -1,4232 +0,0 @@
-//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines ObjC ARC optimizations. ARC stands for
-// Automatic Reference Counting and is a system for managing reference counts
-// for objects in Objective C.
-//
-// The optimizations performed include elimination of redundant, partially
-// redundant, and inconsequential reference count operations, elimination of
-// redundant weak pointer operations, pattern-matching and replacement of
-// low-level operations into higher-level operations, and numerous minor
-// simplifications.
-//
-// This file also defines a simple ARC-aware AliasAnalysis.
-//
-// WARNING: This file knows about certain library functions. It recognizes them
-// by name, and hardwires knowledge of their semantics.
-//
-// WARNING: This file knows about how certain Objective-C library functions are
-// used. Naive LLVM IR transformations which would otherwise be
-// behavior-preserving may break these assumptions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "objc-arc"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-// A handy option to enable/disable all optimizations in this file.
-static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
-
-//===----------------------------------------------------------------------===//
-// Misc. Utilities
-//===----------------------------------------------------------------------===//
-
-namespace {
-  /// MapVector - An associative container with fast insertion-order
-  /// (deterministic) iteration over its elements. Plus the special
-  /// blot operation.
-  template<class KeyT, class ValueT>
-  class MapVector {
-    /// Map - Map keys to indices in Vector.
-    typedef DenseMap<KeyT, size_t> MapTy;
-    MapTy Map;
-
-    /// Vector - Keys and values.
-    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
-    VectorTy Vector;
-
-  public:
-    typedef typename VectorTy::iterator iterator;
-    typedef typename VectorTy::const_iterator const_iterator;
-    iterator begin() { return Vector.begin(); }
-    iterator end() { return Vector.end(); }
-    const_iterator begin() const { return Vector.begin(); }
-    const_iterator end() const { return Vector.end(); }
-
-#ifdef XDEBUG
-    ~MapVector() {
-      assert(Vector.size() >= Map.size()); // May differ due to blotting.
-      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
-           I != E; ++I) {
-        assert(I->second < Vector.size());
-        assert(Vector[I->second].first == I->first);
-      }
-      for (typename VectorTy::const_iterator I = Vector.begin(),
-           E = Vector.end(); I != E; ++I)
-        assert(!I->first ||
-               (Map.count(I->first) &&
-                Map[I->first] == size_t(I - Vector.begin())));
-    }
-#endif
-
-    ValueT &operator[](const KeyT &Arg) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(Arg, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(std::make_pair(Arg, ValueT()));
-        return Vector[Num].second;
-      }
-      return Vector[Pair.first->second].second;
-    }
-
-    std::pair<iterator, bool>
-    insert(const std::pair<KeyT, ValueT> &InsertPair) {
-      std::pair<typename MapTy::iterator, bool> Pair =
-        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
-      if (Pair.second) {
-        size_t Num = Vector.size();
-        Pair.first->second = Num;
-        Vector.push_back(InsertPair);
-        return std::make_pair(Vector.begin() + Num, true);
-      }
-      return std::make_pair(Vector.begin() + Pair.first->second, false);
-    }
-
-    const_iterator find(const KeyT &Key) const {
-      typename MapTy::const_iterator It = Map.find(Key);
-      if (It == Map.end()) return Vector.end();
-      return Vector.begin() + It->second;
-    }
-
-    /// blot - This is similar to erase, but instead of removing the element
-    /// from the vector, it just zeros out the key in the vector. This leaves
-    /// iterators intact, but clients must be prepared for zeroed-out keys when
-    /// iterating.
-    void blot(const KeyT &Key) {
-      typename MapTy::iterator It = Map.find(Key);
-      if (It == Map.end()) return;
-      Vector[It->second].first = KeyT();
-      Map.erase(It);
-    }
-
-    void clear() {
-      Map.clear();
-      Vector.clear();
-    }
-  };
-}
-
-//===----------------------------------------------------------------------===//
-// ARC Utilities.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/Module.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Transforms/Utils/Local.h"
-
-namespace {
-  /// InstructionClass - A simple classification for instructions.
-  enum InstructionClass {
-    IC_Retain,              ///< objc_retain
-    IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
-    IC_RetainBlock,         ///< objc_retainBlock
-    IC_Release,             ///< objc_release
-    IC_Autorelease,         ///< objc_autorelease
-    IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
-    IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
-    IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
-    IC_NoopCast,            ///< objc_retainedObject, etc.
-    IC_FusedRetainAutorelease, ///< objc_retainAutorelease
-    IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
-    IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
-    IC_StoreWeak,           ///< objc_storeWeak (primitive)
-    IC_InitWeak,            ///< objc_initWeak (derived)
-    IC_LoadWeak,            ///< objc_loadWeak (derived)
-    IC_MoveWeak,            ///< objc_moveWeak (derived)
-    IC_CopyWeak,            ///< objc_copyWeak (derived)
-    IC_DestroyWeak,         ///< objc_destroyWeak (derived)
-    IC_StoreStrong,         ///< objc_storeStrong (derived)
-    IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
-    IC_Call,                ///< could call objc_release
-    IC_User,                ///< could "use" a pointer
-    IC_None                 ///< anything else
-  };
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer.
-static bool IsPotentialUse(const Value *Op) {
-  // Pointers to static or stack storage are not reference-counted pointers.
-  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
-    return false;
-  // Special arguments are not reference-counted.
-  if (const Argument *Arg = dyn_cast<Argument>(Op))
-    if (Arg->hasByValAttr() ||
-        Arg->hasNestAttr() ||
-        Arg->hasStructRetAttr())
-      return false;
-  // Only consider values with pointer types.
-  // It seemes intuitive to exclude function pointer types as well, since
-  // functions are never reference-counted, however clang occasionally
-  // bitcasts reference-counted pointers to function-pointer type
-  // temporarily.
-  PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty)
-    return false;
-  // Conservatively assume anything else is a potential use.
-  return true;
-}
-
-/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
-/// of construct CS is.
-static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
-  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I)
-    if (IsPotentialUse(*I))
-      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
-
-  return CS.onlyReadsMemory() ? IC_None : IC_Call;
-}
-
-/// GetFunctionClass - Determine if F is one of the special known Functions.
-/// If it isn't, return IC_CallOrUser.
-static InstructionClass GetFunctionClass(const Function *F) {
-  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-
-  // No arguments.
-  if (AI == AE)
-    return StringSwitch<InstructionClass>(F->getName())
-      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
-      .Default(IC_CallOrUser);
-
-  // One argument.
-  const Argument *A0 = AI++;
-  if (AI == AE)
-    // Argument is a pointer.
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
-      Type *ETy = PTy->getElementType();
-      // Argument is i8*.
-      if (ETy->isIntegerTy(8))
-        return StringSwitch<InstructionClass>(F->getName())
-          .Case("objc_retain",                IC_Retain)
-          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
-          .Case("objc_retainBlock",           IC_RetainBlock)
-          .Case("objc_release",               IC_Release)
-          .Case("objc_autorelease",           IC_Autorelease)
-          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
-          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
-          .Case("objc_retainedObject",        IC_NoopCast)
-          .Case("objc_unretainedObject",      IC_NoopCast)
-          .Case("objc_unretainedPointer",     IC_NoopCast)
-          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
-          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
-          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
-          .Default(IC_CallOrUser);
-
-      // Argument is i8**
-      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-        if (Pte->getElementType()->isIntegerTy(8))
-          return StringSwitch<InstructionClass>(F->getName())
-            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
-            .Case("objc_loadWeak",              IC_LoadWeak)
-            .Case("objc_destroyWeak",           IC_DestroyWeak)
-            .Default(IC_CallOrUser);
-    }
-
-  // Two arguments, first is i8**.
-  const Argument *A1 = AI++;
-  if (AI == AE)
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
-      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
-        if (Pte->getElementType()->isIntegerTy(8))
-          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
-            Type *ETy1 = PTy1->getElementType();
-            // Second argument is i8*
-            if (ETy1->isIntegerTy(8))
-              return StringSwitch<InstructionClass>(F->getName())
-                     .Case("objc_storeWeak",             IC_StoreWeak)
-                     .Case("objc_initWeak",              IC_InitWeak)
-                     .Case("objc_storeStrong",           IC_StoreStrong)
-                     .Default(IC_CallOrUser);
-            // Second argument is i8**.
-            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
-              if (Pte1->getElementType()->isIntegerTy(8))
-                return StringSwitch<InstructionClass>(F->getName())
-                       .Case("objc_moveWeak",              IC_MoveWeak)
-                       .Case("objc_copyWeak",              IC_CopyWeak)
-                       .Default(IC_CallOrUser);
-          }
-
-  // Anything else.
-  return IC_CallOrUser;
-}
-
-/// GetInstructionClass - Determine what kind of construct V is.
-static InstructionClass GetInstructionClass(const Value *V) {
-  if (const Instruction *I = dyn_cast<Instruction>(V)) {
-    // Any instruction other than bitcast and gep with a pointer operand have a
-    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
-    // to a subsequent use, rather than using it themselves, in this sense.
-    // As a short cut, several other opcodes are known to have no pointer
-    // operands of interest. And ret is never followed by a release, so it's
-    // not interesting to examine.
-    switch (I->getOpcode()) {
-    case Instruction::Call: {
-      const CallInst *CI = cast<CallInst>(I);
-      // Check for calls to special functions.
-      if (const Function *F = CI->getCalledFunction()) {
-        InstructionClass Class = GetFunctionClass(F);
-        if (Class != IC_CallOrUser)
-          return Class;
-
-        // None of the intrinsic functions do objc_release. For intrinsics, the
-        // only question is whether or not they may be users.
-        switch (F->getIntrinsicID()) {
-        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
-        case Intrinsic::stacksave: case Intrinsic::stackrestore:
-        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
-        case Intrinsic::objectsize: case Intrinsic::prefetch:
-        case Intrinsic::stackprotector:
-        case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64:
-        case Intrinsic::eh_typeid_for: case Intrinsic::eh_dwarf_cfa:
-        case Intrinsic::eh_sjlj_lsda: case Intrinsic::eh_sjlj_functioncontext:
-        case Intrinsic::init_trampoline: case Intrinsic::adjust_trampoline:
-        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
-        case Intrinsic::invariant_start: case Intrinsic::invariant_end:
-        // Don't let dbg info affect our results.
-        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
-          // Short cut: Some intrinsics obviously don't use ObjC pointers.
-          return IC_None;
-        default:
-          break;
-        }
-      }
-      return GetCallSiteClass(CI);
-    }
-    case Instruction::Invoke:
-      return GetCallSiteClass(cast<InvokeInst>(I));
-    case Instruction::BitCast:
-    case Instruction::GetElementPtr:
-    case Instruction::Select: case Instruction::PHI:
-    case Instruction::Ret: case Instruction::Br:
-    case Instruction::Switch: case Instruction::IndirectBr:
-    case Instruction::Alloca: case Instruction::VAArg:
-    case Instruction::Add: case Instruction::FAdd:
-    case Instruction::Sub: case Instruction::FSub:
-    case Instruction::Mul: case Instruction::FMul:
-    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
-    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
-    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
-    case Instruction::And: case Instruction::Or: case Instruction::Xor:
-    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
-    case Instruction::IntToPtr: case Instruction::FCmp:
-    case Instruction::FPTrunc: case Instruction::FPExt:
-    case Instruction::FPToUI: case Instruction::FPToSI:
-    case Instruction::UIToFP: case Instruction::SIToFP:
-    case Instruction::InsertElement: case Instruction::ExtractElement:
-    case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
-      break;
-    case Instruction::ICmp:
-      // Comparing a pointer with null, or any other constant, isn't an
-      // interesting use, because we don't care what the pointer points to, or
-      // about the values of any other dynamic reference-counted pointers.
-      if (IsPotentialUse(I->getOperand(1)))
-        return IC_User;
-      break;
-    default:
-      // For anything else, check all the operands.
-      // Note that this includes both operands of a Store: while the first
-      // operand isn't actually being dereferenced, it is being stored to
-      // memory where we can no longer track who might read it and dereference
-      // it, so we have to consider it potentially used.
-      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
-           OI != OE; ++OI)
-        if (IsPotentialUse(*OI))
-          return IC_User;
-    }
-  }
-
-  // Otherwise, it's totally inert for ARC purposes.
-  return IC_None;
-}
-
-/// GetBasicInstructionClass - Determine what kind of construct V is. This is
-/// similar to GetInstructionClass except that it only detects objc runtine
-/// calls. This allows it to be faster.
-static InstructionClass GetBasicInstructionClass(const Value *V) {
-  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
-    if (const Function *F = CI->getCalledFunction())
-      return GetFunctionClass(F);
-    // Otherwise, be conservative.
-    return IC_CallOrUser;
-  }
-
-  // Otherwise, be conservative.
-  return isa<InvokeInst>(V) ? IC_CallOrUser : IC_User;
-}
-
-/// IsRetain - Test if the given class is objc_retain or
-/// equivalent.
-static bool IsRetain(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV;
-}
-
-/// IsAutorelease - Test if the given class is objc_autorelease or
-/// equivalent.
-static bool IsAutorelease(InstructionClass Class) {
-  return Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// IsForwarding - Test if the given class represents instructions which return
-/// their argument verbatim.
-static bool IsForwarding(InstructionClass Class) {
-  // objc_retainBlock technically doesn't always return its argument
-  // verbatim, but it doesn't matter for our purposes here.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock ||
-         Class == IC_NoopCast;
-}
-
-/// IsNoopOnNull - Test if the given class represents instructions which do
-/// nothing if passed a null pointer.
-static bool IsNoopOnNull(InstructionClass Class) {
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_RetainBlock;
-}
-
-/// IsAlwaysTail - Test if the given class represents instructions which are
-/// always safe to mark with the "tail" keyword.
-static bool IsAlwaysTail(InstructionClass Class) {
-  // IC_RetainBlock may be given a stack argument.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV;
-}
-
-/// IsNoThrow - Test if the given class represents instructions which are always
-/// safe to mark with the nounwind attribute..
-static bool IsNoThrow(InstructionClass Class) {
-  // objc_retainBlock is not nounwind because it calls user copy constructors
-  // which could theoretically throw.
-  return Class == IC_Retain ||
-         Class == IC_RetainRV ||
-         Class == IC_Release ||
-         Class == IC_Autorelease ||
-         Class == IC_AutoreleaseRV ||
-         Class == IC_AutoreleasepoolPush ||
-         Class == IC_AutoreleasepoolPop;
-}
-
-/// EraseInstruction - Erase the given instruction. Many ObjC calls return their
-/// argument verbatim, so if it's such a call and the return value has users,
-/// replace them with the argument value.
-static void EraseInstruction(Instruction *CI) {
-  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
-
-  bool Unused = CI->use_empty();
-
-  if (!Unused) {
-    // Replace the return value with the argument.
-    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
-           "Can't delete non-forwarding instruction with users!");
-    CI->replaceAllUsesWith(OldArg);
-  }
-
-  CI->eraseFromParent();
-
-  if (Unused)
-    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
-}
-
-/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
-/// also knows how to look through objc_retain and objc_autorelease calls, which
-/// we know to return their argument verbatim.
-static const Value *GetUnderlyingObjCPtr(const Value *V) {
-  for (;;) {
-    V = GetUnderlyingObject(V);
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-
-  return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
-}
-
-/// StripPointerCastsAndObjCCalls - This is a wrapper around
-/// Value::stripPointerCasts which also knows how to look through objc_retain
-/// and objc_autorelease calls, which we know to return their argument verbatim.
-static Value *StripPointerCastsAndObjCCalls(Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicInstructionClass(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
-}
-
-/// GetObjCArg - Assuming the given instruction is one of the special calls such
-/// as objc_retain or objc_release, return the argument value, stripped of no-op
-/// casts and forwarding calls.
-static Value *GetObjCArg(Value *Inst) {
-  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
-/// isObjCIdentifiedObject, except that it uses special knowledge of
-/// ObjC conventions...
-static bool IsObjCIdentifiedObject(const Value *V) {
-  // Assume that call results and arguments have their own "provenance".
-  // Constants (including GlobalVariables) and Allocas are never
-  // reference-counted.
-  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
-      isa<Argument>(V) || isa<Constant>(V) ||
-      isa<AllocaInst>(V))
-    return true;
-
-  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
-    const Value *Pointer =
-      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
-      // A constant pointer can't be pointing to an object on the heap. It may
-      // be reference-counted, but it won't be deleted.
-      if (GV->isConstant())
-        return true;
-      StringRef Name = GV->getName();
-      // These special variables are known to hold values which are not
-      // reference-counted pointers.
-      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
-          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
-          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
-          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
-          Name.startswith("\01l_objc_msgSend_fixup_"))
-        return true;
-    }
-  }
-
-  return false;
-}
-
-/// FindSingleUseIdentifiedObject - This is similar to
-/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
-/// with multiple uses.
-static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
-  if (Arg->hasOneUse()) {
-    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
-      return FindSingleUseIdentifiedObject(BC->getOperand(0));
-    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
-      if (GEP->hasAllZeroIndices())
-        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
-    if (IsForwarding(GetBasicInstructionClass(Arg)))
-      return FindSingleUseIdentifiedObject(
-               cast<CallInst>(Arg)->getArgOperand(0));
-    if (!IsObjCIdentifiedObject(Arg))
-      return 0;
-    return Arg;
-  }
-
-  // If we found an identifiable object but it has multiple uses, but they are
-  // trivial uses, we can still consider this to be a single-use value.
-  if (IsObjCIdentifiedObject(Arg)) {
-    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
-         UI != UE; ++UI) {
-      const User *U = *UI;
-      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
-         return 0;
-    }
-
-    return Arg;
-  }
-
-  return 0;
-}
-
-/// ModuleHasARC - Test if the given module looks interesting to run ARC
-/// optimization on.
-static bool ModuleHasARC(const Module &M) {
-  return
-    M.getNamedValue("objc_retain") ||
-    M.getNamedValue("objc_release") ||
-    M.getNamedValue("objc_autorelease") ||
-    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
-    M.getNamedValue("objc_retainBlock") ||
-    M.getNamedValue("objc_autoreleaseReturnValue") ||
-    M.getNamedValue("objc_autoreleasePoolPush") ||
-    M.getNamedValue("objc_loadWeakRetained") ||
-    M.getNamedValue("objc_loadWeak") ||
-    M.getNamedValue("objc_destroyWeak") ||
-    M.getNamedValue("objc_storeWeak") ||
-    M.getNamedValue("objc_initWeak") ||
-    M.getNamedValue("objc_moveWeak") ||
-    M.getNamedValue("objc_copyWeak") ||
-    M.getNamedValue("objc_retainedObject") ||
-    M.getNamedValue("objc_unretainedObject") ||
-    M.getNamedValue("objc_unretainedPointer");
-}
-
-/// DoesObjCBlockEscape - Test whether the given pointer, which is an
-/// Objective C block pointer, does not "escape". This differs from regular
-/// escape analysis in that a use as an argument to a call is not considered
-/// an escape.
-static bool DoesObjCBlockEscape(const Value *BlockPtr) {
-  // Walk the def-use chains.
-  SmallVector<const Value *, 4> Worklist;
-  Worklist.push_back(BlockPtr);
-  do {
-    const Value *V = Worklist.pop_back_val();
-    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
-         UI != UE; ++UI) {
-      const User *UUser = *UI;
-      // Special - Use by a call (callee or argument) is not considered
-      // to be an escape.
-      switch (GetBasicInstructionClass(UUser)) {
-      case IC_StoreWeak:
-      case IC_InitWeak:
-      case IC_StoreStrong:
-      case IC_Autorelease:
-      case IC_AutoreleaseRV:
-        // These special functions make copies of their pointer arguments.
-        return true;
-      case IC_User:
-      case IC_None:
-        // Use by an instruction which copies the value is an escape if the
-        // result is an escape.
-        if (isa<BitCastInst>(UUser) || isa<GetElementPtrInst>(UUser) ||
-            isa<PHINode>(UUser) || isa<SelectInst>(UUser)) {
-          Worklist.push_back(UUser);
-          continue;
-        }
-        // Use by a load is not an escape.
-        if (isa<LoadInst>(UUser))
-          continue;
-        // Use by a store is not an escape if the use is the address.
-        if (const StoreInst *SI = dyn_cast<StoreInst>(UUser))
-          if (V != SI->getValueOperand())
-            continue;
-        break;
-      default:
-        // Regular calls and other stuff are not considered escapes.
-        continue;
-      }
-      // Otherwise, conservatively assume an escape.
-      return true;
-    }
-  } while (!Worklist.empty());
-
-  // No escapes found.
-  return false;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC AliasAnalysis.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Pass.h"
-
-namespace {
-  /// ObjCARCAliasAnalysis - This is a simple alias analysis
-  /// implementation that uses knowledge of ARC constructs to answer queries.
-  ///
-  /// TODO: This class could be generalized to know about other ObjC-specific
-  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
-  /// even though their offsets are dynamic.
-  class ObjCARCAliasAnalysis : public ImmutablePass,
-                               public AliasAnalysis {
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
-      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
-    virtual void initializePass() {
-      InitializeAliasAnalysis(this);
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    virtual void *getAdjustedAnalysisPointer(const void *PI) {
-      if (PI == &AliasAnalysis::ID)
-        return static_cast<AliasAnalysis *>(this);
-      return this;
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual AliasResult alias(const Location &LocA, const Location &LocB);
-    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
-    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
-    virtual ModRefBehavior getModRefBehavior(const Function *F);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                       const Location &Loc);
-    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                       ImmutableCallSite CS2);
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
-                   "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
-  return new ObjCARCAliasAnalysis();
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasAnalysis::AliasResult
-ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::alias(LocA, LocB);
-
-  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
-  // precise alias query.
-  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
-  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
-  AliasResult Result =
-    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
-                         Location(SB, LocB.Size, LocB.TBAATag));
-  if (Result != MayAlias)
-    return Result;
-
-  // If that failed, climb to the underlying object, including climbing through
-  // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *UA = GetUnderlyingObjCPtr(SA);
-  const Value *UB = GetUnderlyingObjCPtr(SB);
-  if (UA != SA || UB != SB) {
-    Result = AliasAnalysis::alias(Location(UA), Location(UB));
-    // We can't use MustAlias or PartialAlias results here because
-    // GetUnderlyingObjCPtr may return an offsetted pointer value.
-    if (Result == NoAlias)
-      return NoAlias;
-  }
-
-  // If that failed, fail. We don't need to chain here, since that's covered
-  // by the earlier precise query.
-  return MayAlias;
-}
-
-bool
-ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
-                                             bool OrLocal) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
-
-  // First, strip off no-ops, including ObjC-specific no-ops, and try making
-  // a precise alias query.
-  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
-  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
-                                            OrLocal))
-    return true;
-
-  // If that failed, climb to the underlying object, including climbing through
-  // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *U = GetUnderlyingObjCPtr(S);
-  if (U != S)
-    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
-
-  // If that failed, fail. We don't need to chain here, since that's covered
-  // by the earlier precise query.
-  return false;
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
-  // We have nothing to do. Just chain to the next AliasAnalysis.
-  return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::getModRefBehavior(F);
-
-  switch (GetFunctionClass(F)) {
-  case IC_NoopCast:
-    return DoesNotAccessMemory;
-  default:
-    break;
-  }
-
-  return AliasAnalysis::getModRefBehavior(F);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
-  if (!EnableARCOpts)
-    return AliasAnalysis::getModRefInfo(CS, Loc);
-
-  switch (GetBasicInstructionClass(CS.getInstruction())) {
-  case IC_Retain:
-  case IC_RetainRV:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_NoopCast:
-  case IC_AutoreleasepoolPush:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
-    // These functions don't access any memory visible to the compiler.
-    // Note that this doesn't include objc_retainBlock, because it updates
-    // pointers when it copies block data.
-    return NoModRef;
-  default:
-    break;
-  }
-
-  return AliasAnalysis::getModRefInfo(CS, Loc);
-}
-
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
-                                    ImmutableCallSite CS2) {
-  // TODO: Theoretically we could check for dependencies between objc_* calls
-  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
-}
-
-//===----------------------------------------------------------------------===//
-// ARC expansion.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Transforms/Scalar.h"
-
-namespace {
-  /// ObjCARCExpand - Early ARC transformations.
-  class ObjCARCExpand : public FunctionPass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-  public:
-    static char ID;
-    ObjCARCExpand() : FunctionPass(ID) {
-      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCExpand::ID = 0;
-INITIALIZE_PASS(ObjCARCExpand,
-                "objc-arc-expand", "ObjC ARC expansion", false, false)
-
-Pass *llvm::createObjCARCExpandPass() {
-  return new ObjCARCExpand();
-}
-
-void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
-
-bool ObjCARCExpand::doInitialization(Module &M) {
-  Run = ModuleHasARC(M);
-  return false;
-}
-
-bool ObjCARCExpand::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  bool Changed = false;
-
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
-    Instruction *Inst = &*I;
-
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_Retain:
-    case IC_RetainRV:
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV:
-      // These calls return their argument verbatim, as a low-level
-      // optimization. However, this makes high-level optimizations
-      // harder. Undo any uses of this optimization that the front-end
-      // emitted here. We'll redo them in the contract pass.
-      Changed = true;
-      Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC autorelease pool elimination.
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Constants.h"
-
-namespace {
-  /// ObjCARCAPElim - Autorelease pool elimination.
-  class ObjCARCAPElim : public ModulePass {
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool runOnModule(Module &M);
-
-    static bool MayAutorelease(ImmutableCallSite CS, unsigned Depth = 0);
-    static bool OptimizeBB(BasicBlock *BB);
-
-  public:
-    static char ID;
-    ObjCARCAPElim() : ModulePass(ID) {
-      initializeObjCARCAPElimPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCAPElim::ID = 0;
-INITIALIZE_PASS(ObjCARCAPElim,
-                "objc-arc-apelim",
-                "ObjC ARC autorelease pool elimination",
-                false, false)
-
-Pass *llvm::createObjCARCAPElimPass() {
-  return new ObjCARCAPElim();
-}
-
-void ObjCARCAPElim::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-}
-
-/// MayAutorelease - Interprocedurally determine if calls made by the
-/// given call site can possibly produce autoreleases.
-bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
-  if (const Function *Callee = CS.getCalledFunction()) {
-    if (Callee->isDeclaration() || Callee->mayBeOverridden())
-      return true;
-    for (Function::const_iterator I = Callee->begin(), E = Callee->end();
-         I != E; ++I) {
-      const BasicBlock *BB = I;
-      for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
-           J != F; ++J)
-        if (ImmutableCallSite JCS = ImmutableCallSite(J))
-          // This recursion depth limit is arbitrary. It's just great
-          // enough to cover known interesting testcases.
-          if (Depth < 3 &&
-              !JCS.onlyReadsMemory() &&
-              MayAutorelease(JCS, Depth + 1))
-            return true;
-    }
-    return false;
-  }
-
-  return true;
-}
-
-bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
-  bool Changed = false;
-
-  Instruction *Push = 0;
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-    Instruction *Inst = I++;
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPush:
-      Push = Inst;
-      break;
-    case IC_AutoreleasepoolPop:
-      // If this pop matches a push and nothing in between can autorelease,
-      // zap the pair.
-      if (Push && cast<CallInst>(Inst)->getArgOperand(0) == Push) {
-        Changed = true;
-        Inst->eraseFromParent();
-        Push->eraseFromParent();
-      }
-      Push = 0;
-      break;
-    case IC_CallOrUser:
-      if (MayAutorelease(ImmutableCallSite(Inst)))
-        Push = 0;
-      break;
-    default:
-      break;
-    }
-  }
-
-  return Changed;
-}
-
-bool ObjCARCAPElim::runOnModule(Module &M) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!ModuleHasARC(M))
-    return false;
-
-  // Find the llvm.global_ctors variable, as the first step in
-  // identifying the global constructors. In theory, unnecessary autorelease
-  // pools could occur anywhere, but in practice it's pretty rare. Global
-  // ctors are a place where autorelease pools get inserted automatically,
-  // so it's pretty common for them to be unnecessary, and it's pretty
-  // profitable to eliminate them.
-  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
-  if (!GV)
-    return false;
-
-  assert(GV->hasDefinitiveInitializer() &&
-         "llvm.global_ctors is uncooperative!");
-
-  bool Changed = false;
-
-  // Dig the constructor functions out of GV's initializer.
-  ConstantArray *Init = cast<ConstantArray>(GV->getInitializer());
-  for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
-       OI != OE; ++OI) {
-    Value *Op = *OI;
-    // llvm.global_ctors is an array of pairs where the second members
-    // are constructor functions.
-    Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
-    // If the user used a constructor function with the wrong signature and
-    // it got bitcasted or whatever, look the other way.
-    if (!F)
-      continue;
-    // Only look at function definitions.
-    if (F->isDeclaration())
-      continue;
-    // Only look at functions with one basic block.
-    if (llvm::next(F->begin()) != F->end())
-      continue;
-    // Ok, a single-block constructor function definition. Try to optimize it.
-    Changed |= OptimizeBB(F->begin());
-  }
-
-  return Changed;
-}
-
-//===----------------------------------------------------------------------===//
-// ARC optimization.
-//===----------------------------------------------------------------------===//
-
-// TODO: On code like this:
-//
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-// stuff_that_cannot_release()
-// objc_retain(%x)
-// stuff_that_cannot_release()
-// objc_autorelease(%x)
-//
-// The second retain and autorelease can be deleted.
-
-// TODO: It should be possible to delete
-// objc_autoreleasePoolPush and objc_autoreleasePoolPop
-// pairs if nothing is actually autoreleased between them. Also, autorelease
-// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
-// after inlining) can be turned into plain release calls.
-
-// TODO: Critical-edge splitting. If the optimial insertion point is
-// a critical edge, the current algorithm has to fail, because it doesn't
-// know how to split edges. It should be possible to make the optimizer
-// think in terms of edges, rather than blocks, and then split critical
-// edges on demand.
-
-// TODO: OptimizeSequences could generalized to be Interprocedural.
-
-// TODO: Recognize that a bunch of other objc runtime calls have
-// non-escaping arguments and non-releasing arguments, and may be
-// non-autoreleasing.
-
-// TODO: Sink autorelease calls as far as possible. Unfortunately we
-// usually can't sink them past other calls, which would be the main
-// case where it would be useful.
-
-// TODO: The pointer returned from objc_loadWeakRetained is retained.
-
-// TODO: Delete release+retain pairs (rare).
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Support/CFG.h"
-
-STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
-STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
-STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
-STATISTIC(NumRets,        "Number of return value forwarding "
-                          "retain+autoreleaes eliminated");
-STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
-STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
-
-namespace {
-  /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
-  /// uses many of the same techniques, except it uses special ObjC-specific
-  /// reasoning about pointer relationships.
-  class ProvenanceAnalysis {
-    AliasAnalysis *AA;
-
-    typedef std::pair<const Value *, const Value *> ValuePairTy;
-    typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
-    CachedResultsTy CachedResults;
-
-    bool relatedCheck(const Value *A, const Value *B);
-    bool relatedSelect(const SelectInst *A, const Value *B);
-    bool relatedPHI(const PHINode *A, const Value *B);
-
-    void operator=(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-    ProvenanceAnalysis(const ProvenanceAnalysis &) LLVM_DELETED_FUNCTION;
-
-  public:
-    ProvenanceAnalysis() {}
-
-    void setAA(AliasAnalysis *aa) { AA = aa; }
-
-    AliasAnalysis *getAA() const { return AA; }
-
-    bool related(const Value *A, const Value *B);
-
-    void clear() {
-      CachedResults.clear();
-    }
-  };
-}
-
-bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
-  // If the values are Selects with the same condition, we can do a more precise
-  // check: just check for relations between the values on corresponding arms.
-  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
-    if (A->getCondition() == SB->getCondition())
-      return related(A->getTrueValue(), SB->getTrueValue()) ||
-             related(A->getFalseValue(), SB->getFalseValue());
-
-  // Check both arms of the Select node individually.
-  return related(A->getTrueValue(), B) ||
-         related(A->getFalseValue(), B);
-}
-
-bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
-  // If the values are PHIs in the same block, we can do a more precise as well
-  // as efficient check: just check for relations between the values on
-  // corresponding edges.
-  if (const PHINode *PNB = dyn_cast<PHINode>(B))
-    if (PNB->getParent() == A->getParent()) {
-      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
-        if (related(A->getIncomingValue(i),
-                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
-          return true;
-      return false;
-    }
-
-  // Check each unique source of the PHI node against B.
-  SmallPtrSet<const Value *, 4> UniqueSrc;
-  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
-    const Value *PV1 = A->getIncomingValue(i);
-    if (UniqueSrc.insert(PV1) && related(PV1, B))
-      return true;
-  }
-
-  // All of the arms checked out.
-  return false;
-}
-
-/// isStoredObjCPointer - Test if the value of P, or any value covered by its
-/// provenance, is ever stored within the function (not counting callees).
-static bool isStoredObjCPointer(const Value *P) {
-  SmallPtrSet<const Value *, 8> Visited;
-  SmallVector<const Value *, 8> Worklist;
-  Worklist.push_back(P);
-  Visited.insert(P);
-  do {
-    P = Worklist.pop_back_val();
-    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
-         UI != UE; ++UI) {
-      const User *Ur = *UI;
-      if (isa<StoreInst>(Ur)) {
-        if (UI.getOperandNo() == 0)
-          // The pointer is stored.
-          return true;
-        // The pointed is stored through.
-        continue;
-      }
-      if (isa<CallInst>(Ur))
-        // The pointer is passed as an argument, ignore this.
-        continue;
-      if (isa<PtrToIntInst>(P))
-        // Assume the worst.
-        return true;
-      if (Visited.insert(Ur))
-        Worklist.push_back(Ur);
-    }
-  } while (!Worklist.empty());
-
-  // Everything checked out.
-  return false;
-}
-
-bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
-  // Skip past provenance pass-throughs.
-  A = GetUnderlyingObjCPtr(A);
-  B = GetUnderlyingObjCPtr(B);
-
-  // Quick check.
-  if (A == B)
-    return true;
-
-  // Ask regular AliasAnalysis, for a first approximation.
-  switch (AA->alias(A, B)) {
-  case AliasAnalysis::NoAlias:
-    return false;
-  case AliasAnalysis::MustAlias:
-  case AliasAnalysis::PartialAlias:
-    return true;
-  case AliasAnalysis::MayAlias:
-    break;
-  }
-
-  bool AIsIdentified = IsObjCIdentifiedObject(A);
-  bool BIsIdentified = IsObjCIdentifiedObject(B);
-
-  // An ObjC-Identified object can't alias a load if it is never locally stored.
-  if (AIsIdentified) {
-    // Check for an obvious escape.
-    if (isa<LoadInst>(B))
-      return isStoredObjCPointer(A);
-    if (BIsIdentified) {
-      // Check for an obvious escape.
-      if (isa<LoadInst>(A))
-        return isStoredObjCPointer(B);
-      // Both pointers are identified and escapes aren't an evident problem.
-      return false;
-    }
-  } else if (BIsIdentified) {
-    // Check for an obvious escape.
-    if (isa<LoadInst>(A))
-      return isStoredObjCPointer(B);
-  }
-
-   // Special handling for PHI and Select.
-  if (const PHINode *PN = dyn_cast<PHINode>(A))
-    return relatedPHI(PN, B);
-  if (const PHINode *PN = dyn_cast<PHINode>(B))
-    return relatedPHI(PN, A);
-  if (const SelectInst *S = dyn_cast<SelectInst>(A))
-    return relatedSelect(S, B);
-  if (const SelectInst *S = dyn_cast<SelectInst>(B))
-    return relatedSelect(S, A);
-
-  // Conservative.
-  return true;
-}
-
-bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
-  // Begin by inserting a conservative value into the map. If the insertion
-  // fails, we have the answer already. If it succeeds, leave it there until we
-  // compute the real answer to guard against recursive queries.
-  if (A > B) std::swap(A, B);
-  std::pair<CachedResultsTy::iterator, bool> Pair =
-    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
-  if (!Pair.second)
-    return Pair.first->second;
-
-  bool Result = relatedCheck(A, B);
-  CachedResults[ValuePairTy(A, B)] = Result;
-  return Result;
-}
-
-namespace {
-  // Sequence - A sequence of states that a pointer may go through in which an
-  // objc_retain and objc_release are actually needed.
-  enum Sequence {
-    S_None,
-    S_Retain,         ///< objc_retain(x)
-    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement
-    S_Use,            ///< any use of x
-    S_Stop,           ///< like S_Release, but code motion is stopped
-    S_Release,        ///< objc_release(x)
-    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release
-  };
-}
-
-static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
-  // The easy cases.
-  if (A == B)
-    return A;
-  if (A == S_None || B == S_None)
-    return S_None;
-
-  if (A > B) std::swap(A, B);
-  if (TopDown) {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Retain || A == S_CanRelease) &&
-        (B == S_CanRelease || B == S_Use))
-      return B;
-  } else {
-    // Choose the side which is further along in the sequence.
-    if ((A == S_Use || A == S_CanRelease) &&
-        (B == S_Use || B == S_Release || B == S_Stop || B == S_MovableRelease))
-      return A;
-    // If both sides are releases, choose the more conservative one.
-    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
-      return A;
-    if (A == S_Release && B == S_MovableRelease)
-      return A;
-  }
-
-  return S_None;
-}
-
-namespace {
-  /// RRInfo - Unidirectional information about either a
-  /// retain-decrement-use-release sequence or release-use-decrement-retain
-  /// reverese sequence.
-  struct RRInfo {
-    /// KnownSafe - After an objc_retain, the reference count of the referenced
-    /// object is known to be positive. Similarly, before an objc_release, the
-    /// reference count of the referenced object is known to be positive. If
-    /// there are retain-release pairs in code regions where the retain count
-    /// is known to be positive, they can be eliminated, regardless of any side
-    /// effects between them.
-    ///
-    /// Also, a retain+release pair nested within another retain+release
-    /// pair all on the known same pointer value can be eliminated, regardless
-    /// of any intervening side effects.
-    ///
-    /// KnownSafe is true when either of these conditions is satisfied.
-    bool KnownSafe;
-
-    /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
-    /// opposed to objc_retain calls).
-    bool IsRetainBlock;
-
-    /// IsTailCallRelease - True of the objc_release calls are all marked
-    /// with the "tail" keyword.
-    bool IsTailCallRelease;
-
-    /// ReleaseMetadata - If the Calls are objc_release calls and they all have
-    /// a clang.imprecise_release tag, this is the metadata tag.
-    MDNode *ReleaseMetadata;
-
-    /// Calls - For a top-down sequence, the set of objc_retains or
-    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
-    SmallPtrSet<Instruction *, 2> Calls;
-
-    /// ReverseInsertPts - The set of optimal insert positions for
-    /// moving calls in the opposite sequence.
-    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
-
-    RRInfo() :
-      KnownSafe(false), IsRetainBlock(false),
-      IsTailCallRelease(false),
-      ReleaseMetadata(0) {}
-
-    void clear();
-  };
-}
-
-void RRInfo::clear() {
-  KnownSafe = false;
-  IsRetainBlock = false;
-  IsTailCallRelease = false;
-  ReleaseMetadata = 0;
-  Calls.clear();
-  ReverseInsertPts.clear();
-}
-
-namespace {
-  /// PtrState - This class summarizes several per-pointer runtime properties
-  /// which are propogated through the flow graph.
-  class PtrState {
-    /// KnownPositiveRefCount - True if the reference count is known to
-    /// be incremented.
-    bool KnownPositiveRefCount;
-
-    /// Partial - True of we've seen an opportunity for partial RR elimination,
-    /// such as pushing calls into a CFG triangle or into one side of a
-    /// CFG diamond.
-    bool Partial;
-
-    /// Seq - The current position in the sequence.
-    Sequence Seq : 8;
-
-  public:
-    /// RRI - Unidirectional information about the current sequence.
-    /// TODO: Encapsulate this better.
-    RRInfo RRI;
-
-    PtrState() : KnownPositiveRefCount(false), Partial(false),
-                 Seq(S_None) {}
-
-    void SetKnownPositiveRefCount() {
-      KnownPositiveRefCount = true;
-    }
-
-    void ClearRefCount() {
-      KnownPositiveRefCount = false;
-    }
-
-    bool IsKnownIncremented() const {
-      return KnownPositiveRefCount;
-    }
-
-    void SetSeq(Sequence NewSeq) {
-      Seq = NewSeq;
-    }
-
-    Sequence GetSeq() const {
-      return Seq;
-    }
-
-    void ClearSequenceProgress() {
-      ResetSequenceProgress(S_None);
-    }
-
-    void ResetSequenceProgress(Sequence NewSeq) {
-      Seq = NewSeq;
-      Partial = false;
-      RRI.clear();
-    }
-
-    void Merge(const PtrState &Other, bool TopDown);
-  };
-}
-
-void
-PtrState::Merge(const PtrState &Other, bool TopDown) {
-  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
-  KnownPositiveRefCount = KnownPositiveRefCount && Other.KnownPositiveRefCount;
-
-  // We can't merge a plain objc_retain with an objc_retainBlock.
-  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
-    Seq = S_None;
-
-  // If we're not in a sequence (anymore), drop all associated state.
-  if (Seq == S_None) {
-    Partial = false;
-    RRI.clear();
-  } else if (Partial || Other.Partial) {
-    // If we're doing a merge on a path that's previously seen a partial
-    // merge, conservatively drop the sequence, to avoid doing partial
-    // RR elimination. If the branch predicates for the two merge differ,
-    // mixing them is unsafe.
-    ClearSequenceProgress();
-  } else {
-    // Conservatively merge the ReleaseMetadata information.
-    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
-      RRI.ReleaseMetadata = 0;
-
-    RRI.KnownSafe = RRI.KnownSafe && Other.RRI.KnownSafe;
-    RRI.IsTailCallRelease = RRI.IsTailCallRelease &&
-                            Other.RRI.IsTailCallRelease;
-    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
-
-    // Merge the insert point sets. If there are any differences,
-    // that makes this a partial merge.
-    Partial = RRI.ReverseInsertPts.size() != Other.RRI.ReverseInsertPts.size();
-    for (SmallPtrSet<Instruction *, 2>::const_iterator
-         I = Other.RRI.ReverseInsertPts.begin(),
-         E = Other.RRI.ReverseInsertPts.end(); I != E; ++I)
-      Partial |= RRI.ReverseInsertPts.insert(*I);
-  }
-}
-
-namespace {
-  /// BBState - Per-BasicBlock state.
-  class BBState {
-    /// TopDownPathCount - The number of unique control paths from the entry
-    /// which can reach this block.
-    unsigned TopDownPathCount;
-
-    /// BottomUpPathCount - The number of unique control paths to exits
-    /// from this block.
-    unsigned BottomUpPathCount;
-
-    /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
-    typedef MapVector<const Value *, PtrState> MapTy;
-
-    /// PerPtrTopDown - The top-down traversal uses this to record information
-    /// known about a pointer at the bottom of each block.
-    MapTy PerPtrTopDown;
-
-    /// PerPtrBottomUp - The bottom-up traversal uses this to record information
-    /// known about a pointer at the top of each block.
-    MapTy PerPtrBottomUp;
-
-    /// Preds, Succs - Effective successors and predecessors of the current
-    /// block (this ignores ignorable edges and ignored backedges).
-    SmallVector<BasicBlock *, 2> Preds;
-    SmallVector<BasicBlock *, 2> Succs;
-
-  public:
-    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
-
-    typedef MapTy::iterator ptr_iterator;
-    typedef MapTy::const_iterator ptr_const_iterator;
-
-    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
-    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
-    ptr_const_iterator top_down_ptr_begin() const {
-      return PerPtrTopDown.begin();
-    }
-    ptr_const_iterator top_down_ptr_end() const {
-      return PerPtrTopDown.end();
-    }
-
-    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
-    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
-    ptr_const_iterator bottom_up_ptr_begin() const {
-      return PerPtrBottomUp.begin();
-    }
-    ptr_const_iterator bottom_up_ptr_end() const {
-      return PerPtrBottomUp.end();
-    }
-
-    /// SetAsEntry - Mark this block as being an entry block, which has one
-    /// path from the entry by definition.
-    void SetAsEntry() { TopDownPathCount = 1; }
-
-    /// SetAsExit - Mark this block as being an exit block, which has one
-    /// path to an exit by definition.
-    void SetAsExit()  { BottomUpPathCount = 1; }
-
-    PtrState &getPtrTopDownState(const Value *Arg) {
-      return PerPtrTopDown[Arg];
-    }
-
-    PtrState &getPtrBottomUpState(const Value *Arg) {
-      return PerPtrBottomUp[Arg];
-    }
-
-    void clearBottomUpPointers() {
-      PerPtrBottomUp.clear();
-    }
-
-    void clearTopDownPointers() {
-      PerPtrTopDown.clear();
-    }
-
-    void InitFromPred(const BBState &Other);
-    void InitFromSucc(const BBState &Other);
-    void MergePred(const BBState &Other);
-    void MergeSucc(const BBState &Other);
-
-    /// GetAllPathCount - Return the number of possible unique paths from an
-    /// entry to an exit which pass through this block. This is only valid
-    /// after both the top-down and bottom-up traversals are complete.
-    unsigned GetAllPathCount() const {
-      assert(TopDownPathCount != 0);
-      assert(BottomUpPathCount != 0);
-      return TopDownPathCount * BottomUpPathCount;
-    }
-
-    // Specialized CFG utilities.
-    typedef SmallVectorImpl<BasicBlock *>::const_iterator edge_iterator;
-    edge_iterator pred_begin() { return Preds.begin(); }
-    edge_iterator pred_end() { return Preds.end(); }
-    edge_iterator succ_begin() { return Succs.begin(); }
-    edge_iterator succ_end() { return Succs.end(); }
-
-    void addSucc(BasicBlock *Succ) { Succs.push_back(Succ); }
-    void addPred(BasicBlock *Pred) { Preds.push_back(Pred); }
-
-    bool isExit() const { return Succs.empty(); }
-  };
-}
-
-void BBState::InitFromPred(const BBState &Other) {
-  PerPtrTopDown = Other.PerPtrTopDown;
-  TopDownPathCount = Other.TopDownPathCount;
-}
-
-void BBState::InitFromSucc(const BBState &Other) {
-  PerPtrBottomUp = Other.PerPtrBottomUp;
-  BottomUpPathCount = Other.BottomUpPathCount;
-}
-
-/// MergePred - The top-down traversal uses this to merge information about
-/// predecessors to form the initial state for a new block.
-void BBState::MergePred(const BBState &Other) {
-  // Other.TopDownPathCount can be 0, in which case it is either dead or a
-  // loop backedge. Loop backedges are special.
-  TopDownPathCount += Other.TopDownPathCount;
-
-  // Check for overflow. If we have overflow, fall back to conservative behavior.
-  if (TopDownPathCount < Other.TopDownPathCount) {
-    clearTopDownPointers();
-    return;
-  }
-
-  // For each entry in the other set, if our set has an entry with the same key,
-  // merge the entries. Otherwise, copy the entry and merge it with an empty
-  // entry.
-  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
-       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
-                             /*TopDown=*/true);
-  }
-
-  // For each entry in our set, if the other set doesn't have an entry with the
-  // same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = top_down_ptr_begin(),
-       ME = top_down_ptr_end(); MI != ME; ++MI)
-    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/true);
-}
-
-/// MergeSucc - The bottom-up traversal uses this to merge information about
-/// successors to form the initial state for a new block.
-void BBState::MergeSucc(const BBState &Other) {
-  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
-  // loop backedge. Loop backedges are special.
-  BottomUpPathCount += Other.BottomUpPathCount;
-
-  // Check for overflow. If we have overflow, fall back to conservative behavior.
-  if (BottomUpPathCount < Other.BottomUpPathCount) {
-    clearBottomUpPointers();
-    return;
-  }
-
-  // For each entry in the other set, if our set has an entry with the
-  // same key, merge the entries. Otherwise, copy the entry and merge
-  // it with an empty entry.
-  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
-       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
-    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
-    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
-                             /*TopDown=*/false);
-  }
-
-  // For each entry in our set, if the other set doesn't have an entry
-  // with the same key, force it to merge with an empty entry.
-  for (ptr_iterator MI = bottom_up_ptr_begin(),
-       ME = bottom_up_ptr_end(); MI != ME; ++MI)
-    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
-      MI->second.Merge(PtrState(), /*TopDown=*/false);
-}
-
-namespace {
-  /// ObjCARCOpt - The main ARC optimization pass.
-  class ObjCARCOpt : public FunctionPass {
-    bool Changed;
-    ProvenanceAnalysis PA;
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-    /// RetainRVCallee, etc. - Declarations for ObjC runtime
-    /// functions, for use in creating calls to them. These are initialized
-    /// lazily to avoid cluttering up the Module with unused declarations.
-    Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
-             *RetainCallee, *RetainBlockCallee, *AutoreleaseCallee;
-
-    /// UsedInThisFunciton - Flags which determine whether each of the
-    /// interesting runtine functions is in fact used in the current function.
-    unsigned UsedInThisFunction;
-
-    /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
-    /// metadata.
-    unsigned ImpreciseReleaseMDKind;
-
-    /// CopyOnEscapeMDKind - The Metadata Kind for clang.arc.copy_on_escape
-    /// metadata.
-    unsigned CopyOnEscapeMDKind;
-
-    /// NoObjCARCExceptionsMDKind - The Metadata Kind for
-    /// clang.arc.no_objc_arc_exceptions metadata.
-    unsigned NoObjCARCExceptionsMDKind;
-
-    Constant *getRetainRVCallee(Module *M);
-    Constant *getAutoreleaseRVCallee(Module *M);
-    Constant *getReleaseCallee(Module *M);
-    Constant *getRetainCallee(Module *M);
-    Constant *getRetainBlockCallee(Module *M);
-    Constant *getAutoreleaseCallee(Module *M);
-
-    bool IsRetainBlockOptimizable(const Instruction *Inst);
-
-    void OptimizeRetainCall(Function &F, Instruction *Retain);
-    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
-    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
-    void OptimizeIndividualCalls(Function &F);
-
-    void CheckForCFGHazards(const BasicBlock *BB,
-                            DenseMap<const BasicBlock *, BBState> &BBStates,
-                            BBState &MyStates) const;
-    bool VisitInstructionBottomUp(Instruction *Inst,
-                                  BasicBlock *BB,
-                                  MapVector<Value *, RRInfo> &Retains,
-                                  BBState &MyStates);
-    bool VisitBottomUp(BasicBlock *BB,
-                       DenseMap<const BasicBlock *, BBState> &BBStates,
-                       MapVector<Value *, RRInfo> &Retains);
-    bool VisitInstructionTopDown(Instruction *Inst,
-                                 DenseMap<Value *, RRInfo> &Releases,
-                                 BBState &MyStates);
-    bool VisitTopDown(BasicBlock *BB,
-                      DenseMap<const BasicBlock *, BBState> &BBStates,
-                      DenseMap<Value *, RRInfo> &Releases);
-    bool Visit(Function &F,
-               DenseMap<const BasicBlock *, BBState> &BBStates,
-               MapVector<Value *, RRInfo> &Retains,
-               DenseMap<Value *, RRInfo> &Releases);
-
-    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
-                   MapVector<Value *, RRInfo> &Retains,
-                   DenseMap<Value *, RRInfo> &Releases,
-                   SmallVectorImpl<Instruction *> &DeadInsts,
-                   Module *M);
-
-    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
-                              MapVector<Value *, RRInfo> &Retains,
-                              DenseMap<Value *, RRInfo> &Releases,
-                              Module *M);
-
-    void OptimizeWeakCalls(Function &F);
-
-    bool OptimizeSequences(Function &F);
-
-    void OptimizeReturns(Function &F);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-    virtual void releaseMemory();
-
-  public:
-    static char ID;
-    ObjCARCOpt() : FunctionPass(ID) {
-      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCOpt::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCOpt,
-                      "objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
-INITIALIZE_PASS_END(ObjCARCOpt,
-                    "objc-arc", "ObjC ARC optimization", false, false)
-
-Pass *llvm::createObjCARCOptPass() {
-  return new ObjCARCOpt();
-}
-
-void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<ObjCARCAliasAnalysis>();
-  AU.addRequired<AliasAnalysis>();
-  // ARC optimization doesn't currently split critical edges.
-  AU.setPreservesCFG();
-}
-
-bool ObjCARCOpt::IsRetainBlockOptimizable(const Instruction *Inst) {
-  // Without the magic metadata tag, we have to assume this might be an
-  // objc_retainBlock call inserted to convert a block pointer to an id,
-  // in which case it really is needed.
-  if (!Inst->getMetadata(CopyOnEscapeMDKind))
-    return false;
-
-  // If the pointer "escapes" (not including being used in a call),
-  // the copy may be needed.
-  if (DoesObjCBlockEscape(Inst))
-    return false;
-
-  // Otherwise, it's not needed.
-  return true;
-}
-
-Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
-  if (!RetainRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
-                             Attributes);
-  }
-  return RetainRVCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
-  if (!AutoreleaseRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    AutoreleaseRVCallee =
-      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
-                             Attributes);
-  }
-  return AutoreleaseRVCallee;
-}
-
-Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
-  if (!ReleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    ReleaseCallee =
-      M->getOrInsertFunction(
-        "objc_release",
-        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return ReleaseCallee;
-}
-
-Constant *ObjCARCOpt::getRetainCallee(Module *M) {
-  if (!RetainCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainCallee =
-      M->getOrInsertFunction(
-        "objc_retain",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return RetainCallee;
-}
-
-Constant *ObjCARCOpt::getRetainBlockCallee(Module *M) {
-  if (!RetainBlockCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    // objc_retainBlock is not nounwind because it calls user copy constructors
-    // which could theoretically throw.
-    RetainBlockCallee =
-      M->getOrInsertFunction(
-        "objc_retainBlock",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        AttributeSet());
-  }
-  return RetainBlockCallee;
-}
-
-Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
-  if (!AutoreleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *Params[] = { PointerType::getUnqual(Type::getInt8Ty(C)) };
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    AutoreleaseCallee =
-      M->getOrInsertFunction(
-        "objc_autorelease",
-        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return AutoreleaseCallee;
-}
-
-/// IsPotentialUse - Test whether the given value is possible a
-/// reference-counted pointer, including tests which utilize AliasAnalysis.
-static bool IsPotentialUse(const Value *Op, AliasAnalysis &AA) {
-  // First make the rudimentary check.
-  if (!IsPotentialUse(Op))
-    return false;
-
-  // Objects in constant memory are not reference-counted.
-  if (AA.pointsToConstantMemory(Op))
-    return false;
-
-  // Pointers in constant memory are not pointing to reference-counted objects.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
-    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
-      return false;
-
-  // Otherwise assume the worst.
-  return true;
-}
-
-/// CanAlterRefCount - Test whether the given instruction can result in a
-/// reference count modification (positive or negative) for the pointer's
-/// object.
-static bool
-CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
-                 ProvenanceAnalysis &PA, InstructionClass Class) {
-  switch (Class) {
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_User:
-    // These operations never directly modify a reference count.
-    return false;
-  default: break;
-  }
-
-  ImmutableCallSite CS = static_cast<const Value *>(Inst);
-  assert(CS && "Only calls can alter reference counts!");
-
-  // See if AliasAnalysis can help us with the call.
-  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
-  if (AliasAnalysis::onlyReadsMemory(MRB))
-    return false;
-  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
-    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-         I != E; ++I) {
-      const Value *Op = *I;
-      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-        return true;
-    }
-    return false;
-  }
-
-  // Assume the worst.
-  return true;
-}
-
-/// CanUse - Test whether the given instruction can "use" the given pointer's
-/// object in a way that requires the reference count to be positive.
-static bool
-CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
-       InstructionClass Class) {
-  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
-  if (Class == IC_Call)
-    return false;
-
-  // Consider various instructions which may have pointer arguments which are
-  // not "uses".
-  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
-    // Comparing a pointer with null, or any other constant, isn't really a use,
-    // because we don't care what the pointer points to, or about the values
-    // of any other dynamic reference-counted pointers.
-    if (!IsPotentialUse(ICI->getOperand(1), *PA.getAA()))
-      return false;
-  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
-    // For calls, just check the arguments (and not the callee operand).
-    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
-         OE = CS.arg_end(); OI != OE; ++OI) {
-      const Value *Op = *OI;
-      if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-        return true;
-    }
-    return false;
-  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-    // Special-case stores, because we don't care about the stored value, just
-    // the store address.
-    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
-    // If we can't tell what the underlying object was, assume there is a
-    // dependence.
-    return IsPotentialUse(Op, *PA.getAA()) && PA.related(Op, Ptr);
-  }
-
-  // Check each operand for a match.
-  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
-       OI != OE; ++OI) {
-    const Value *Op = *OI;
-    if (IsPotentialUse(Op, *PA.getAA()) && PA.related(Ptr, Op))
-      return true;
-  }
-  return false;
-}
-
-/// CanInterruptRV - Test whether the given instruction can autorelease
-/// any pointer or cause an autoreleasepool pop.
-static bool
-CanInterruptRV(InstructionClass Class) {
-  switch (Class) {
-  case IC_AutoreleasepoolPop:
-  case IC_CallOrUser:
-  case IC_Call:
-  case IC_Autorelease:
-  case IC_AutoreleaseRV:
-  case IC_FusedRetainAutorelease:
-  case IC_FusedRetainAutoreleaseRV:
-    return true;
-  default:
-    return false;
-  }
-}
-
-namespace {
-  /// DependenceKind - There are several kinds of dependence-like concepts in
-  /// use here.
-  enum DependenceKind {
-    NeedsPositiveRetainCount,
-    AutoreleasePoolBoundary,
-    CanChangeRetainCount,
-    RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
-    RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
-    RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
-  };
-}
-
-/// Depends - Test if there can be dependencies on Inst through Arg. This
-/// function only tests dependencies relevant for removing pairs of calls.
-static bool
-Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
-        ProvenanceAnalysis &PA) {
-  // If we've reached the definition of Arg, stop.
-  if (Inst == Arg)
-    return true;
-
-  switch (Flavor) {
-  case NeedsPositiveRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      return false;
-    default:
-      return CanUse(Inst, Arg, PA, Class);
-    }
-  }
-
-  case AutoreleasePoolBoundary: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-      // These mark the end and begin of an autorelease pool scope.
-      return true;
-    default:
-      // Nothing else does this.
-      return false;
-    }
-  }
-
-  case CanChangeRetainCount: {
-    InstructionClass Class = GetInstructionClass(Inst);
-    switch (Class) {
-    case IC_AutoreleasepoolPop:
-      // Conservatively assume this can decrement any count.
-      return true;
-    case IC_AutoreleasepoolPush:
-    case IC_None:
-      return false;
-    default:
-      return CanAlterRefCount(Inst, Arg, PA, Class);
-    }
-  }
-
-  case RetainAutoreleaseDep:
-    switch (GetBasicInstructionClass(Inst)) {
-    case IC_AutoreleasepoolPop:
-    case IC_AutoreleasepoolPush:
-      // Don't merge an objc_autorelease with an objc_retain inside a different
-      // autoreleasepool scope.
-      return true;
-    case IC_Retain:
-    case IC_RetainRV:
-      // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
-    default:
-      // Nothing else matters for objc_retainAutorelease formation.
-      return false;
-    }
-
-  case RetainAutoreleaseRVDep: {
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    switch (Class) {
-    case IC_Retain:
-    case IC_RetainRV:
-      // Check for a retain of the same pointer for merging.
-      return GetObjCArg(Inst) == Arg;
-    default:
-      // Anything that can autorelease interrupts
-      // retainAutoreleaseReturnValue formation.
-      return CanInterruptRV(Class);
-    }
-  }
-
-  case RetainRVDep:
-    return CanInterruptRV(GetBasicInstructionClass(Inst));
-  }
-
-  llvm_unreachable("Invalid dependence flavor");
-}
-
-/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
-/// find local and non-local dependencies on Arg.
-/// TODO: Cache results?
-static void
-FindDependencies(DependenceKind Flavor,
-                 const Value *Arg,
-                 BasicBlock *StartBB, Instruction *StartInst,
-                 SmallPtrSet<Instruction *, 4> &DependingInstructions,
-                 SmallPtrSet<const BasicBlock *, 4> &Visited,
-                 ProvenanceAnalysis &PA) {
-  BasicBlock::iterator StartPos = StartInst;
-
-  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
-  Worklist.push_back(std::make_pair(StartBB, StartPos));
-  do {
-    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
-      Worklist.pop_back_val();
-    BasicBlock *LocalStartBB = Pair.first;
-    BasicBlock::iterator LocalStartPos = Pair.second;
-    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
-    for (;;) {
-      if (LocalStartPos == StartBBBegin) {
-        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
-        if (PI == PE)
-          // If we've reached the function entry, produce a null dependence.
-          DependingInstructions.insert(0);
-        else
-          // Add the predecessors to the worklist.
-          do {
-            BasicBlock *PredBB = *PI;
-            if (Visited.insert(PredBB))
-              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
-          } while (++PI != PE);
-        break;
-      }
-
-      Instruction *Inst = --LocalStartPos;
-      if (Depends(Flavor, Inst, Arg, PA)) {
-        DependingInstructions.insert(Inst);
-        break;
-      }
-    }
-  } while (!Worklist.empty());
-
-  // Determine whether the original StartBB post-dominates all of the blocks we
-  // visited. If not, insert a sentinal indicating that most optimizations are
-  // not safe.
-  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
-       E = Visited.end(); I != E; ++I) {
-    const BasicBlock *BB = *I;
-    if (BB == StartBB)
-      continue;
-    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
-      const BasicBlock *Succ = *SI;
-      if (Succ != StartBB && !Visited.count(Succ)) {
-        DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
-        return;
-      }
-    }
-  }
-}
-
-static bool isNullOrUndef(const Value *V) {
-  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static bool isNoopInstruction(const Instruction *I) {
-  return isa<BitCastInst>(I) ||
-         (isa<GetElementPtrInst>(I) &&
-          cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-/// OptimizeRetainCall - Turn objc_retain into
-/// objc_retainAutoreleasedReturnValue if the operand is a return value.
-void
-ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
-  ImmutableCallSite CS(GetObjCArg(Retain));
-  const Instruction *Call = CS.getInstruction();
-  if (!Call) return;
-  if (Call->getParent() != Retain->getParent()) return;
-
-  // Check that the call is next to the retain.
-  BasicBlock::const_iterator I = Call;
-  ++I;
-  while (isNoopInstruction(I)) ++I;
-  if (&*I != Retain)
-    return;
-
-  // Turn it to an objc_retainAutoreleasedReturnValue..
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
-}
-
-/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
-/// objc_retain if the operand is not a return value.  Or, if it can be paired
-/// with an objc_autoreleaseReturnValue, delete the pair and return true.
-bool
-ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
-  // Check for the argument being from an immediately preceding call or invoke.
-  const Value *Arg = GetObjCArg(RetainRV);
-  ImmutableCallSite CS(Arg);
-  if (const Instruction *Call = CS.getInstruction()) {
-    if (Call->getParent() == RetainRV->getParent()) {
-      BasicBlock::const_iterator I = Call;
-      ++I;
-      while (isNoopInstruction(I)) ++I;
-      if (&*I == RetainRV)
-        return false;
-    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
-      BasicBlock *RetainRVParent = RetainRV->getParent();
-      if (II->getNormalDest() == RetainRVParent) {
-        BasicBlock::const_iterator I = RetainRVParent->begin();
-        while (isNoopInstruction(I)) ++I;
-        if (&*I == RetainRV)
-          return false;
-      }
-    }
-  }
-
-  // Check for being preceded by an objc_autoreleaseReturnValue on the same
-  // pointer. In this case, we can delete the pair.
-  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
-  if (I != Begin) {
-    do --I; while (I != Begin && isNoopInstruction(I));
-    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
-        GetObjCArg(I) == Arg) {
-      Changed = true;
-      ++NumPeeps;
-      EraseInstruction(I);
-      EraseInstruction(RetainRV);
-      return true;
-    }
-  }
-
-  // Turn it to a plain objc_retain.
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
-  return false;
-}
-
-/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
-/// objc_autorelease if the result is not used as a return value.
-void
-ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
-  // Check for a return of the pointer value.
-  const Value *Ptr = GetObjCArg(AutoreleaseRV);
-  SmallVector<const Value *, 2> Users;
-  Users.push_back(Ptr);
-  do {
-    Ptr = Users.pop_back_val();
-    for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
-         UI != UE; ++UI) {
-      const User *I = *UI;
-      if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
-        return;
-      if (isa<BitCastInst>(I))
-        Users.push_back(I);
-    }
-  } while (!Users.empty());
-
-  Changed = true;
-  ++NumPeeps;
-  cast<CallInst>(AutoreleaseRV)->
-    setCalledFunction(getAutoreleaseCallee(F.getParent()));
-}
-
-/// OptimizeIndividualCalls - Visit each call, one at a time, and make
-/// simplifications without doing any additional analysis.
-void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
-  // Reset all the flags in preparation for recomputing them.
-  UsedInThisFunction = 0;
-
-  // Visit all objc_* calls in F.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-
-    switch (Class) {
-    default: break;
-
-    // Delete no-op casts. These function calls have special semantics, but
-    // the semantics are entirely implemented via lowering in the front-end,
-    // so by the time they reach the optimizer, they are just no-op calls
-    // which return their argument.
-    //
-    // There are gray areas here, as the ability to cast reference-counted
-    // pointers to raw void* and back allows code to break ARC assumptions,
-    // however these are currently considered to be unimportant.
-    case IC_NoopCast:
-      Changed = true;
-      ++NumNoops;
-      EraseInstruction(Inst);
-      continue;
-
-    // If the pointer-to-weak-pointer is null, it's undefined behavior.
-    case IC_StoreWeak:
-    case IC_LoadWeak:
-    case IC_LoadWeakRetained:
-    case IC_InitWeak:
-    case IC_DestroyWeak: {
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0))) {
-        Changed = true;
-        Type *Ty = CI->getArgOperand(0)->getType();
-        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
-                      Constant::getNullValue(Ty),
-                      CI);
-        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
-        CI->eraseFromParent();
-        continue;
-      }
-      break;
-    }
-    case IC_CopyWeak:
-    case IC_MoveWeak: {
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(0)) ||
-          isNullOrUndef(CI->getArgOperand(1))) {
-        Changed = true;
-        Type *Ty = CI->getArgOperand(0)->getType();
-        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
-                      Constant::getNullValue(Ty),
-                      CI);
-        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
-        CI->eraseFromParent();
-        continue;
-      }
-      break;
-    }
-    case IC_Retain:
-      OptimizeRetainCall(F, Inst);
-      break;
-    case IC_RetainRV:
-      if (OptimizeRetainRVCall(F, Inst))
-        continue;
-      break;
-    case IC_AutoreleaseRV:
-      OptimizeAutoreleaseRVCall(F, Inst);
-      break;
-    }
-
-    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
-    if (IsAutorelease(Class) && Inst->use_empty()) {
-      CallInst *Call = cast<CallInst>(Inst);
-      const Value *Arg = Call->getArgOperand(0);
-      Arg = FindSingleUseIdentifiedObject(Arg);
-      if (Arg) {
-        Changed = true;
-        ++NumAutoreleases;
-
-        // Create the declaration lazily.
-        LLVMContext &C = Inst->getContext();
-        CallInst *NewCall =
-          CallInst::Create(getReleaseCallee(F.getParent()),
-                           Call->getArgOperand(0), "", Call);
-        NewCall->setMetadata(ImpreciseReleaseMDKind,
-                             MDNode::get(C, ArrayRef<Value *>()));
-        EraseInstruction(Call);
-        Inst = NewCall;
-        Class = IC_Release;
-      }
-    }
-
-    // For functions which can never be passed stack arguments, add
-    // a tail keyword.
-    if (IsAlwaysTail(Class)) {
-      Changed = true;
-      cast<CallInst>(Inst)->setTailCall();
-    }
-
-    // Set nounwind as needed.
-    if (IsNoThrow(Class)) {
-      Changed = true;
-      cast<CallInst>(Inst)->setDoesNotThrow();
-    }
-
-    if (!IsNoopOnNull(Class)) {
-      UsedInThisFunction |= 1 << Class;
-      continue;
-    }
-
-    const Value *Arg = GetObjCArg(Inst);
-
-    // ARC calls with null are no-ops. Delete them.
-    if (isNullOrUndef(Arg)) {
-      Changed = true;
-      ++NumNoops;
-      EraseInstruction(Inst);
-      continue;
-    }
-
-    // Keep track of which of retain, release, autorelease, and retain_block
-    // are actually present in this function.
-    UsedInThisFunction |= 1 << Class;
-
-    // If Arg is a PHI, and one or more incoming values to the
-    // PHI are null, and the call is control-equivalent to the PHI, and there
-    // are no relevant side effects between the PHI and the call, the call
-    // could be pushed up to just those paths with non-null incoming values.
-    // For now, don't bother splitting critical edges for this.
-    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
-    Worklist.push_back(std::make_pair(Inst, Arg));
-    do {
-      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
-      Inst = Pair.first;
-      Arg = Pair.second;
-
-      const PHINode *PN = dyn_cast<PHINode>(Arg);
-      if (!PN) continue;
-
-      // Determine if the PHI has any null operands, or any incoming
-      // critical edges.
-      bool HasNull = false;
-      bool HasCriticalEdges = false;
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        Value *Incoming =
-          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-        if (isNullOrUndef(Incoming))
-          HasNull = true;
-        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
-                   .getNumSuccessors() != 1) {
-          HasCriticalEdges = true;
-          break;
-        }
-      }
-      // If we have null operands and no critical edges, optimize.
-      if (!HasCriticalEdges && HasNull) {
-        SmallPtrSet<Instruction *, 4> DependingInstructions;
-        SmallPtrSet<const BasicBlock *, 4> Visited;
-
-        // Check that there is nothing that cares about the reference
-        // count between the call and the phi.
-        switch (Class) {
-        case IC_Retain:
-        case IC_RetainBlock:
-          // These can always be moved up.
-          break;
-        case IC_Release:
-          // These can't be moved across things that care about the retain
-          // count.
-          FindDependencies(NeedsPositiveRetainCount, Arg,
-                           Inst->getParent(), Inst,
-                           DependingInstructions, Visited, PA);
-          break;
-        case IC_Autorelease:
-          // These can't be moved across autorelease pool scope boundaries.
-          FindDependencies(AutoreleasePoolBoundary, Arg,
-                           Inst->getParent(), Inst,
-                           DependingInstructions, Visited, PA);
-          break;
-        case IC_RetainRV:
-        case IC_AutoreleaseRV:
-          // Don't move these; the RV optimization depends on the autoreleaseRV
-          // being tail called, and the retainRV being immediately after a call
-          // (which might still happen if we get lucky with codegen layout, but
-          // it's not worth taking the chance).
-          continue;
-        default:
-          llvm_unreachable("Invalid dependence flavor");
-        }
-
-        if (DependingInstructions.size() == 1 &&
-            *DependingInstructions.begin() == PN) {
-          Changed = true;
-          ++NumPartialNoops;
-          // Clone the call into each predecessor that has a non-null value.
-          CallInst *CInst = cast<CallInst>(Inst);
-          Type *ParamTy = CInst->getArgOperand(0)->getType();
-          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-            Value *Incoming =
-              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
-            if (!isNullOrUndef(Incoming)) {
-              CallInst *Clone = cast<CallInst>(CInst->clone());
-              Value *Op = PN->getIncomingValue(i);
-              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
-              if (Op->getType() != ParamTy)
-                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
-              Clone->setArgOperand(0, Op);
-              Clone->insertBefore(InsertPos);
-              Worklist.push_back(std::make_pair(Clone, Incoming));
-            }
-          }
-          // Erase the original call.
-          EraseInstruction(CInst);
-          continue;
-        }
-      }
-    } while (!Worklist.empty());
-  }
-}
-
-/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
-/// control flow, or other CFG structures where moving code across the edge
-/// would result in it being executed more.
-void
-ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
-                               DenseMap<const BasicBlock *, BBState> &BBStates,
-                               BBState &MyStates) const {
-  // If any top-down local-use or possible-dec has a succ which is earlier in
-  // the sequence, forget it.
-  for (BBState::ptr_iterator I = MyStates.top_down_ptr_begin(),
-       E = MyStates.top_down_ptr_end(); I != E; ++I)
-    switch (I->second.GetSeq()) {
-    default: break;
-    case S_Use: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      // If the terminator is an invoke marked with the
-      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-      // ignored, for ARC purposes.
-      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-        --SE;
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None:
-        case S_CanRelease: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_Use:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
-    case S_CanRelease: {
-      const Value *Arg = I->first;
-      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
-      bool SomeSuccHasSame = false;
-      bool AllSuccsHaveSame = true;
-      PtrState &S = I->second;
-      succ_const_iterator SI(TI), SE(TI, false);
-
-      // If the terminator is an invoke marked with the
-      // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-      // ignored, for ARC purposes.
-      if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-        --SE;
-
-      for (; SI != SE; ++SI) {
-        Sequence SuccSSeq = S_None;
-        bool SuccSRRIKnownSafe = false;
-        // If VisitBottomUp has pointer information for this successor, take
-        // what we know about it.
-        DenseMap<const BasicBlock *, BBState>::iterator BBI =
-          BBStates.find(*SI);
-        assert(BBI != BBStates.end());
-        const PtrState &SuccS = BBI->second.getPtrBottomUpState(Arg);
-        SuccSSeq = SuccS.GetSeq();
-        SuccSRRIKnownSafe = SuccS.RRI.KnownSafe;
-        switch (SuccSSeq) {
-        case S_None: {
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe) {
-            S.ClearSequenceProgress();
-            break;
-          }
-          continue;
-        }
-        case S_CanRelease:
-          SomeSuccHasSame = true;
-          break;
-        case S_Stop:
-        case S_Release:
-        case S_MovableRelease:
-        case S_Use:
-          if (!S.RRI.KnownSafe && !SuccSRRIKnownSafe)
-            AllSuccsHaveSame = false;
-          break;
-        case S_Retain:
-          llvm_unreachable("bottom-up pointer in retain state!");
-        }
-      }
-      // If the state at the other end of any of the successor edges
-      // matches the current state, require all edges to match. This
-      // guards against loops in the middle of a sequence.
-      if (SomeSuccHasSame && !AllSuccsHaveSame)
-        S.ClearSequenceProgress();
-      break;
-    }
-    }
-}
-
-bool
-ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
-                                     BasicBlock *BB,
-                                     MapVector<Value *, RRInfo> &Retains,
-                                     BBState &MyStates) {
-  bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
-  const Value *Arg = 0;
-
-  switch (Class) {
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-
-    // If we see two releases in a row on the same pointer. If so, make
-    // a note, and we'll cicle back to revisit it after we've
-    // hopefully eliminated the second release, which may allow us to
-    // eliminate the first release too.
-    // Theoretically we could implement removal of nested retain+release
-    // pairs by making PtrState hold a stack of states, but this is
-    // simple and avoids adding overhead for the non-nested case.
-    if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
-      NestingDetected = true;
-
-    MDNode *ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-    S.ResetSequenceProgress(ReleaseMetadata ? S_MovableRelease : S_Release);
-    S.RRI.ReleaseMetadata = ReleaseMetadata;
-    S.RRI.KnownSafe = S.IsKnownIncremented();
-    S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-    S.RRI.Calls.insert(Inst);
-
-    S.SetKnownPositiveRefCount();
-    break;
-  }
-  case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrBottomUpState(Arg);
-    S.SetKnownPositiveRefCount();
-
-    switch (S.GetSeq()) {
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-    case S_Use:
-      S.RRI.ReverseInsertPts.clear();
-      // FALL THROUGH
-    case S_CanRelease:
-      // Don't do retain+release tracking for IC_RetainRV, because it's
-      // better to let it remain as the first instruction after a call.
-      if (Class != IC_RetainRV) {
-        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-        Retains[Inst] = S.RRI;
-      }
-      S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
-    }
-    return NestingDetected;
-  }
-  case IC_AutoreleasepoolPop:
-    // Conservatively, clear MyStates for all known pointers.
-    MyStates.clearBottomUpPointers();
-    return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
-    // These are irrelevant.
-    return NestingDetected;
-  default:
-    break;
-  }
-
-  // Consider any other possible effects of this instruction on each
-  // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
-       ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
-    const Value *Ptr = MI->first;
-    if (Ptr == Arg)
-      continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
-
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
-      switch (Seq) {
-      case S_Use:
-        S.SetSeq(S_CanRelease);
-        continue;
-      case S_CanRelease:
-      case S_Release:
-      case S_MovableRelease:
-      case S_Stop:
-      case S_None:
-        break;
-      case S_Retain:
-        llvm_unreachable("bottom-up pointer in retain state!");
-      }
-    }
-
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_Release:
-    case S_MovableRelease:
-      if (CanUse(Inst, Ptr, PA, Class)) {
-        assert(S.RRI.ReverseInsertPts.empty());
-        // If this is an invoke instruction, we're scanning it as part of
-        // one of its successor blocks, since we can't insert code after it
-        // in its own block, and we don't want to split critical edges.
-        if (isa<InvokeInst>(Inst))
-          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
-        else
-          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
-        S.SetSeq(S_Use);
-      } else if (Seq == S_Release &&
-                 (Class == IC_User || Class == IC_CallOrUser)) {
-        // Non-movable releases depend on any possible objc pointer use.
-        S.SetSeq(S_Stop);
-        assert(S.RRI.ReverseInsertPts.empty());
-        // As above; handle invoke specially.
-        if (isa<InvokeInst>(Inst))
-          S.RRI.ReverseInsertPts.insert(BB->getFirstInsertionPt());
-        else
-          S.RRI.ReverseInsertPts.insert(llvm::next(BasicBlock::iterator(Inst)));
-      }
-      break;
-    case S_Stop:
-      if (CanUse(Inst, Ptr, PA, Class))
-        S.SetSeq(S_Use);
-      break;
-    case S_CanRelease:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Retain:
-      llvm_unreachable("bottom-up pointer in retain state!");
-    }
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
-                          DenseMap<const BasicBlock *, BBState> &BBStates,
-                          MapVector<Value *, RRInfo> &Retains) {
-  bool NestingDetected = false;
-  BBState &MyStates = BBStates[BB];
-
-  // Merge the states from each successor to compute the initial state
-  // for the current block.
-  BBState::edge_iterator SI(MyStates.succ_begin()),
-                         SE(MyStates.succ_end());
-  if (SI != SE) {
-    const BasicBlock *Succ = *SI;
-    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
-    assert(I != BBStates.end());
-    MyStates.InitFromSucc(I->second);
-    ++SI;
-    for (; SI != SE; ++SI) {
-      Succ = *SI;
-      I = BBStates.find(Succ);
-      assert(I != BBStates.end());
-      MyStates.MergeSucc(I->second);
-    }
-  }
-
-  // Visit all the instructions, bottom-up.
-  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
-    Instruction *Inst = llvm::prior(I);
-
-    // Invoke instructions are visited as part of their successors (below).
-    if (isa<InvokeInst>(Inst))
-      continue;
-
-    NestingDetected |= VisitInstructionBottomUp(Inst, BB, Retains, MyStates);
-  }
-
-  // If there's a predecessor with an invoke, visit the invoke as if it were
-  // part of this block, since we can't insert code after an invoke in its own
-  // block, and we don't want to split critical edges.
-  for (BBState::edge_iterator PI(MyStates.pred_begin()),
-       PE(MyStates.pred_end()); PI != PE; ++PI) {
-    BasicBlock *Pred = *PI;
-    if (InvokeInst *II = dyn_cast<InvokeInst>(&Pred->back()))
-      NestingDetected |= VisitInstructionBottomUp(II, BB, Retains, MyStates);
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
-                                    DenseMap<Value *, RRInfo> &Releases,
-                                    BBState &MyStates) {
-  bool NestingDetected = false;
-  InstructionClass Class = GetInstructionClass(Inst);
-  const Value *Arg = 0;
-
-  switch (Class) {
-  case IC_RetainBlock:
-    // An objc_retainBlock call with just a use may need to be kept,
-    // because it may be copying a block from the stack to the heap.
-    if (!IsRetainBlockOptimizable(Inst))
-      break;
-    // FALLTHROUGH
-  case IC_Retain:
-  case IC_RetainRV: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-
-    // Don't do retain+release tracking for IC_RetainRV, because it's
-    // better to let it remain as the first instruction after a call.
-    if (Class != IC_RetainRV) {
-      // If we see two retains in a row on the same pointer. If so, make
-      // a note, and we'll cicle back to revisit it after we've
-      // hopefully eliminated the second retain, which may allow us to
-      // eliminate the first retain too.
-      // Theoretically we could implement removal of nested retain+release
-      // pairs by making PtrState hold a stack of states, but this is
-      // simple and avoids adding overhead for the non-nested case.
-      if (S.GetSeq() == S_Retain)
-        NestingDetected = true;
-
-      S.ResetSequenceProgress(S_Retain);
-      S.RRI.IsRetainBlock = Class == IC_RetainBlock;
-      S.RRI.KnownSafe = S.IsKnownIncremented();
-      S.RRI.Calls.insert(Inst);
-    }
-
-    S.SetKnownPositiveRefCount();
-
-    // A retain can be a potential use; procede to the generic checking
-    // code below.
-    break;
-  }
-  case IC_Release: {
-    Arg = GetObjCArg(Inst);
-
-    PtrState &S = MyStates.getPtrTopDownState(Arg);
-    S.ClearRefCount();
-
-    switch (S.GetSeq()) {
-    case S_Retain:
-    case S_CanRelease:
-      S.RRI.ReverseInsertPts.clear();
-      // FALL THROUGH
-    case S_Use:
-      S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
-      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
-      Releases[Inst] = S.RRI;
-      S.ClearSequenceProgress();
-      break;
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
-    }
-    break;
-  }
-  case IC_AutoreleasepoolPop:
-    // Conservatively, clear MyStates for all known pointers.
-    MyStates.clearTopDownPointers();
-    return NestingDetected;
-  case IC_AutoreleasepoolPush:
-  case IC_None:
-    // These are irrelevant.
-    return NestingDetected;
-  default:
-    break;
-  }
-
-  // Consider any other possible effects of this instruction on each
-  // pointer being tracked.
-  for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
-       ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
-    const Value *Ptr = MI->first;
-    if (Ptr == Arg)
-      continue; // Handled above.
-    PtrState &S = MI->second;
-    Sequence Seq = S.GetSeq();
-
-    // Check for possible releases.
-    if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
-      S.ClearRefCount();
-      switch (Seq) {
-      case S_Retain:
-        S.SetSeq(S_CanRelease);
-        assert(S.RRI.ReverseInsertPts.empty());
-        S.RRI.ReverseInsertPts.insert(Inst);
-
-        // One call can't cause a transition from S_Retain to S_CanRelease
-        // and S_CanRelease to S_Use. If we've made the first transition,
-        // we're done.
-        continue;
-      case S_Use:
-      case S_CanRelease:
-      case S_None:
-        break;
-      case S_Stop:
-      case S_Release:
-      case S_MovableRelease:
-        llvm_unreachable("top-down pointer in release state!");
-      }
-    }
-
-    // Check for possible direct uses.
-    switch (Seq) {
-    case S_CanRelease:
-      if (CanUse(Inst, Ptr, PA, Class))
-        S.SetSeq(S_Use);
-      break;
-    case S_Retain:
-    case S_Use:
-    case S_None:
-      break;
-    case S_Stop:
-    case S_Release:
-    case S_MovableRelease:
-      llvm_unreachable("top-down pointer in release state!");
-    }
-  }
-
-  return NestingDetected;
-}
-
-bool
-ObjCARCOpt::VisitTopDown(BasicBlock *BB,
-                         DenseMap<const BasicBlock *, BBState> &BBStates,
-                         DenseMap<Value *, RRInfo> &Releases) {
-  bool NestingDetected = false;
-  BBState &MyStates = BBStates[BB];
-
-  // Merge the states from each predecessor to compute the initial state
-  // for the current block.
-  BBState::edge_iterator PI(MyStates.pred_begin()),
-                         PE(MyStates.pred_end());
-  if (PI != PE) {
-    const BasicBlock *Pred = *PI;
-    DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
-    assert(I != BBStates.end());
-    MyStates.InitFromPred(I->second);
-    ++PI;
-    for (; PI != PE; ++PI) {
-      Pred = *PI;
-      I = BBStates.find(Pred);
-      assert(I != BBStates.end());
-      MyStates.MergePred(I->second);
-    }
-  }
-
-  // Visit all the instructions, top-down.
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    Instruction *Inst = I;
-    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
-  }
-
-  CheckForCFGHazards(BB, BBStates, MyStates);
-  return NestingDetected;
-}
-
-static void
-ComputePostOrders(Function &F,
-                  SmallVectorImpl<BasicBlock *> &PostOrder,
-                  SmallVectorImpl<BasicBlock *> &ReverseCFGPostOrder,
-                  unsigned NoObjCARCExceptionsMDKind,
-                  DenseMap<const BasicBlock *, BBState> &BBStates) {
-  /// Visited - The visited set, for doing DFS walks.
-  SmallPtrSet<BasicBlock *, 16> Visited;
-
-  // Do DFS, computing the PostOrder.
-  SmallPtrSet<BasicBlock *, 16> OnStack;
-  SmallVector<std::pair<BasicBlock *, succ_iterator>, 16> SuccStack;
-
-  // Functions always have exactly one entry block, and we don't have
-  // any other block that we treat like an entry block.
-  BasicBlock *EntryBB = &F.getEntryBlock();
-  BBState &MyStates = BBStates[EntryBB];
-  MyStates.SetAsEntry();
-  TerminatorInst *EntryTI = cast<TerminatorInst>(&EntryBB->back());
-  SuccStack.push_back(std::make_pair(EntryBB, succ_iterator(EntryTI)));
-  Visited.insert(EntryBB);
-  OnStack.insert(EntryBB);
-  do {
-  dfs_next_succ:
-    BasicBlock *CurrBB = SuccStack.back().first;
-    TerminatorInst *TI = cast<TerminatorInst>(&CurrBB->back());
-    succ_iterator SE(TI, false);
-
-    // If the terminator is an invoke marked with the
-    // clang.arc.no_objc_arc_exceptions metadata, the unwind edge can be
-    // ignored, for ARC purposes.
-    if (isa<InvokeInst>(TI) && TI->getMetadata(NoObjCARCExceptionsMDKind))
-      --SE;
-
-    while (SuccStack.back().second != SE) {
-      BasicBlock *SuccBB = *SuccStack.back().second++;
-      if (Visited.insert(SuccBB)) {
-        TerminatorInst *TI = cast<TerminatorInst>(&SuccBB->back());
-        SuccStack.push_back(std::make_pair(SuccBB, succ_iterator(TI)));
-        BBStates[CurrBB].addSucc(SuccBB);
-        BBState &SuccStates = BBStates[SuccBB];
-        SuccStates.addPred(CurrBB);
-        OnStack.insert(SuccBB);
-        goto dfs_next_succ;
-      }
-
-      if (!OnStack.count(SuccBB)) {
-        BBStates[CurrBB].addSucc(SuccBB);
-        BBStates[SuccBB].addPred(CurrBB);
-      }
-    }
-    OnStack.erase(CurrBB);
-    PostOrder.push_back(CurrBB);
-    SuccStack.pop_back();
-  } while (!SuccStack.empty());
-
-  Visited.clear();
-
-  // Do reverse-CFG DFS, computing the reverse-CFG PostOrder.
-  // Functions may have many exits, and there also blocks which we treat
-  // as exits due to ignored edges.
-  SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *ExitBB = I;
-    BBState &MyStates = BBStates[ExitBB];
-    if (!MyStates.isExit())
-      continue;
-
-    MyStates.SetAsExit();
-
-    PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
-    Visited.insert(ExitBB);
-    while (!PredStack.empty()) {
-    reverse_dfs_next_succ:
-      BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
-      while (PredStack.back().second != PE) {
-        BasicBlock *BB = *PredStack.back().second++;
-        if (Visited.insert(BB)) {
-          PredStack.push_back(std::make_pair(BB, BBStates[BB].pred_begin()));
-          goto reverse_dfs_next_succ;
-        }
-      }
-      ReverseCFGPostOrder.push_back(PredStack.pop_back_val().first);
-    }
-  }
-}
-
-// Visit - Visit the function both top-down and bottom-up.
-bool
-ObjCARCOpt::Visit(Function &F,
-                  DenseMap<const BasicBlock *, BBState> &BBStates,
-                  MapVector<Value *, RRInfo> &Retains,
-                  DenseMap<Value *, RRInfo> &Releases) {
-
-  // Use reverse-postorder traversals, because we magically know that loops
-  // will be well behaved, i.e. they won't repeatedly call retain on a single
-  // pointer without doing a release. We can't use the ReversePostOrderTraversal
-  // class here because we want the reverse-CFG postorder to consider each
-  // function exit point, and we want to ignore selected cycle edges.
-  SmallVector<BasicBlock *, 16> PostOrder;
-  SmallVector<BasicBlock *, 16> ReverseCFGPostOrder;
-  ComputePostOrders(F, PostOrder, ReverseCFGPostOrder,
-                    NoObjCARCExceptionsMDKind,
-                    BBStates);
-
-  // Use reverse-postorder on the reverse CFG for bottom-up.
-  bool BottomUpNestingDetected = false;
-  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-       ReverseCFGPostOrder.rbegin(), E = ReverseCFGPostOrder.rend();
-       I != E; ++I)
-    BottomUpNestingDetected |= VisitBottomUp(*I, BBStates, Retains);
-
-  // Use reverse-postorder for top-down.
-  bool TopDownNestingDetected = false;
-  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator I =
-       PostOrder.rbegin(), E = PostOrder.rend();
-       I != E; ++I)
-    TopDownNestingDetected |= VisitTopDown(*I, BBStates, Releases);
-
-  return TopDownNestingDetected && BottomUpNestingDetected;
-}
-
-/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
-void ObjCARCOpt::MoveCalls(Value *Arg,
-                           RRInfo &RetainsToMove,
-                           RRInfo &ReleasesToMove,
-                           MapVector<Value *, RRInfo> &Retains,
-                           DenseMap<Value *, RRInfo> &Releases,
-                           SmallVectorImpl<Instruction *> &DeadInsts,
-                           Module *M) {
-  Type *ArgTy = Arg->getType();
-  Type *ParamTy = PointerType::getUnqual(Type::getInt8Ty(ArgTy->getContext()));
-
-  // Insert the new retain and release calls.
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       PI = ReleasesToMove.ReverseInsertPts.begin(),
-       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *InsertPt = *PI;
-    Value *MyArg = ArgTy == ParamTy ? Arg :
-                   new BitCastInst(Arg, ParamTy, "", InsertPt);
-    CallInst *Call =
-      CallInst::Create(RetainsToMove.IsRetainBlock ?
-                         getRetainBlockCallee(M) : getRetainCallee(M),
-                       MyArg, "", InsertPt);
-    Call->setDoesNotThrow();
-    if (RetainsToMove.IsRetainBlock)
-      Call->setMetadata(CopyOnEscapeMDKind,
-                        MDNode::get(M->getContext(), ArrayRef<Value *>()));
-    else
-      Call->setTailCall();
-  }
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       PI = RetainsToMove.ReverseInsertPts.begin(),
-       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
-    Instruction *InsertPt = *PI;
-    Value *MyArg = ArgTy == ParamTy ? Arg :
-                   new BitCastInst(Arg, ParamTy, "", InsertPt);
-    CallInst *Call = CallInst::Create(getReleaseCallee(M), MyArg,
-                                      "", InsertPt);
-    // Attach a clang.imprecise_release metadata tag, if appropriate.
-    if (MDNode *M = ReleasesToMove.ReleaseMetadata)
-      Call->setMetadata(ImpreciseReleaseMDKind, M);
-    Call->setDoesNotThrow();
-    if (ReleasesToMove.IsTailCallRelease)
-      Call->setTailCall();
-  }
-
-  // Delete the original retain and release calls.
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       AI = RetainsToMove.Calls.begin(),
-       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
-    Instruction *OrigRetain = *AI;
-    Retains.blot(OrigRetain);
-    DeadInsts.push_back(OrigRetain);
-  }
-  for (SmallPtrSet<Instruction *, 2>::const_iterator
-       AI = ReleasesToMove.Calls.begin(),
-       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
-    Instruction *OrigRelease = *AI;
-    Releases.erase(OrigRelease);
-    DeadInsts.push_back(OrigRelease);
-  }
-}
-
-/// PerformCodePlacement - Identify pairings between the retains and releases,
-/// and delete and/or move them.
-bool
-ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
-                                   &BBStates,
-                                 MapVector<Value *, RRInfo> &Retains,
-                                 DenseMap<Value *, RRInfo> &Releases,
-                                 Module *M) {
-  bool AnyPairsCompletelyEliminated = false;
-  RRInfo RetainsToMove;
-  RRInfo ReleasesToMove;
-  SmallVector<Instruction *, 4> NewRetains;
-  SmallVector<Instruction *, 4> NewReleases;
-  SmallVector<Instruction *, 8> DeadInsts;
-
-  // Visit each retain.
-  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
-       E = Retains.end(); I != E; ++I) {
-    Value *V = I->first;
-    if (!V) continue; // blotted
-
-    Instruction *Retain = cast<Instruction>(V);
-    Value *Arg = GetObjCArg(Retain);
-
-    // If the object being released is in static or stack storage, we know it's
-    // not being managed by ObjC reference counting, so we can delete pairs
-    // regardless of what possible decrements or uses lie between them.
-    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
-
-    // A constant pointer can't be pointing to an object on the heap. It may
-    // be reference-counted, but it won't be deleted.
-    if (const LoadInst *LI = dyn_cast<LoadInst>(Arg))
-      if (const GlobalVariable *GV =
-            dyn_cast<GlobalVariable>(
-              StripPointerCastsAndObjCCalls(LI->getPointerOperand())))
-        if (GV->isConstant())
-          KnownSafe = true;
-
-    // If a pair happens in a region where it is known that the reference count
-    // is already incremented, we can similarly ignore possible decrements.
-    bool KnownSafeTD = true, KnownSafeBU = true;
-
-    // Connect the dots between the top-down-collected RetainsToMove and
-    // bottom-up-collected ReleasesToMove to form sets of related calls.
-    // This is an iterative process so that we connect multiple releases
-    // to multiple retains if needed.
-    unsigned OldDelta = 0;
-    unsigned NewDelta = 0;
-    unsigned OldCount = 0;
-    unsigned NewCount = 0;
-    bool FirstRelease = true;
-    bool FirstRetain = true;
-    NewRetains.push_back(Retain);
-    for (;;) {
-      for (SmallVectorImpl<Instruction *>::const_iterator
-           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
-        Instruction *NewRetain = *NI;
-        MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
-        assert(It != Retains.end());
-        const RRInfo &NewRetainRRI = It->second;
-        KnownSafeTD &= NewRetainRRI.KnownSafe;
-        for (SmallPtrSet<Instruction *, 2>::const_iterator
-             LI = NewRetainRRI.Calls.begin(),
-             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
-          Instruction *NewRetainRelease = *LI;
-          DenseMap<Value *, RRInfo>::const_iterator Jt =
-            Releases.find(NewRetainRelease);
-          if (Jt == Releases.end())
-            goto next_retain;
-          const RRInfo &NewRetainReleaseRRI = Jt->second;
-          assert(NewRetainReleaseRRI.Calls.count(NewRetain));
-          if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
-            OldDelta -=
-              BBStates[NewRetainRelease->getParent()].GetAllPathCount();
-
-            // Merge the ReleaseMetadata and IsTailCallRelease values.
-            if (FirstRelease) {
-              ReleasesToMove.ReleaseMetadata =
-                NewRetainReleaseRRI.ReleaseMetadata;
-              ReleasesToMove.IsTailCallRelease =
-                NewRetainReleaseRRI.IsTailCallRelease;
-              FirstRelease = false;
-            } else {
-              if (ReleasesToMove.ReleaseMetadata !=
-                    NewRetainReleaseRRI.ReleaseMetadata)
-                ReleasesToMove.ReleaseMetadata = 0;
-              if (ReleasesToMove.IsTailCallRelease !=
-                    NewRetainReleaseRRI.IsTailCallRelease)
-                ReleasesToMove.IsTailCallRelease = false;
-            }
-
-            // Collect the optimal insertion points.
-            if (!KnownSafe)
-              for (SmallPtrSet<Instruction *, 2>::const_iterator
-                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
-                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
-                   RI != RE; ++RI) {
-                Instruction *RIP = *RI;
-                if (ReleasesToMove.ReverseInsertPts.insert(RIP))
-                  NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
-              }
-            NewReleases.push_back(NewRetainRelease);
-          }
-        }
-      }
-      NewRetains.clear();
-      if (NewReleases.empty()) break;
-
-      // Back the other way.
-      for (SmallVectorImpl<Instruction *>::const_iterator
-           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
-        Instruction *NewRelease = *NI;
-        DenseMap<Value *, RRInfo>::const_iterator It =
-          Releases.find(NewRelease);
-        assert(It != Releases.end());
-        const RRInfo &NewReleaseRRI = It->second;
-        KnownSafeBU &= NewReleaseRRI.KnownSafe;
-        for (SmallPtrSet<Instruction *, 2>::const_iterator
-             LI = NewReleaseRRI.Calls.begin(),
-             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
-          Instruction *NewReleaseRetain = *LI;
-          MapVector<Value *, RRInfo>::const_iterator Jt =
-            Retains.find(NewReleaseRetain);
-          if (Jt == Retains.end())
-            goto next_retain;
-          const RRInfo &NewReleaseRetainRRI = Jt->second;
-          assert(NewReleaseRetainRRI.Calls.count(NewRelease));
-          if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
-            unsigned PathCount =
-              BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
-            OldDelta += PathCount;
-            OldCount += PathCount;
-
-            // Merge the IsRetainBlock values.
-            if (FirstRetain) {
-              RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
-              FirstRetain = false;
-            } else if (ReleasesToMove.IsRetainBlock !=
-                       NewReleaseRetainRRI.IsRetainBlock)
-              // It's not possible to merge the sequences if one uses
-              // objc_retain and the other uses objc_retainBlock.
-              goto next_retain;
-
-            // Collect the optimal insertion points.
-            if (!KnownSafe)
-              for (SmallPtrSet<Instruction *, 2>::const_iterator
-                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
-                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
-                   RI != RE; ++RI) {
-                Instruction *RIP = *RI;
-                if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
-                  PathCount = BBStates[RIP->getParent()].GetAllPathCount();
-                  NewDelta += PathCount;
-                  NewCount += PathCount;
-                }
-              }
-            NewRetains.push_back(NewReleaseRetain);
-          }
-        }
-      }
-      NewReleases.clear();
-      if (NewRetains.empty()) break;
-    }
-
-    // If the pointer is known incremented or nested, we can safely delete the
-    // pair regardless of what's between them.
-    if (KnownSafeTD || KnownSafeBU) {
-      RetainsToMove.ReverseInsertPts.clear();
-      ReleasesToMove.ReverseInsertPts.clear();
-      NewCount = 0;
-    } else {
-      // Determine whether the new insertion points we computed preserve the
-      // balance of retain and release calls through the program.
-      // TODO: If the fully aggressive solution isn't valid, try to find a
-      // less aggressive solution which is.
-      if (NewDelta != 0)
-        goto next_retain;
-    }
-
-    // Determine whether the original call points are balanced in the retain and
-    // release calls through the program. If not, conservatively don't touch
-    // them.
-    // TODO: It's theoretically possible to do code motion in this case, as
-    // long as the existing imbalances are maintained.
-    if (OldDelta != 0)
-      goto next_retain;
-
-    // Ok, everything checks out and we're all set. Let's move some code!
-    Changed = true;
-    assert(OldCount != 0 && "Unreachable code?");
-    AnyPairsCompletelyEliminated = NewCount == 0;
-    NumRRs += OldCount - NewCount;
-    MoveCalls(Arg, RetainsToMove, ReleasesToMove,
-              Retains, Releases, DeadInsts, M);
-
-  next_retain:
-    NewReleases.clear();
-    NewRetains.clear();
-    RetainsToMove.clear();
-    ReleasesToMove.clear();
-  }
-
-  // Now that we're done moving everything, we can delete the newly dead
-  // instructions, as we no longer need them as insert points.
-  while (!DeadInsts.empty())
-    EraseInstruction(DeadInsts.pop_back_val());
-
-  return AnyPairsCompletelyEliminated;
-}
-
-/// OptimizeWeakCalls - Weak pointer optimizations.
-void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
-  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
-  // itself because it uses AliasAnalysis and we need to do provenance
-  // queries instead.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
-      continue;
-
-    // Delete objc_loadWeak calls with no users.
-    if (Class == IC_LoadWeak && Inst->use_empty()) {
-      Inst->eraseFromParent();
-      continue;
-    }
-
-    // TODO: For now, just look for an earlier available version of this value
-    // within the same block. Theoretically, we could do memdep-style non-local
-    // analysis too, but that would want caching. A better approach would be to
-    // use the technique that EarlyCSE uses.
-    inst_iterator Current = llvm::prior(I);
-    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
-    for (BasicBlock::iterator B = CurrentBB->begin(),
-                              J = Current.getInstructionIterator();
-         J != B; --J) {
-      Instruction *EarlierInst = &*llvm::prior(J);
-      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
-      switch (EarlierClass) {
-      case IC_LoadWeak:
-      case IC_LoadWeakRetained: {
-        // If this is loading from the same pointer, replace this load's value
-        // with that one.
-        CallInst *Call = cast<CallInst>(Inst);
-        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
-        Value *Arg = Call->getArgOperand(0);
-        Value *EarlierArg = EarlierCall->getArgOperand(0);
-        switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
-          Changed = true;
-          // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            CallInst *CI =
-              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
-                               "", Call);
-            CI->setTailCall();
-          }
-          // Zap the fully redundant load.
-          Call->replaceAllUsesWith(EarlierCall);
-          Call->eraseFromParent();
-          goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
-          goto clobbered;
-        case AliasAnalysis::NoAlias:
-          break;
-        }
-        break;
-      }
-      case IC_StoreWeak:
-      case IC_InitWeak: {
-        // If this is storing to the same pointer and has the same size etc.
-        // replace this load's value with the stored value.
-        CallInst *Call = cast<CallInst>(Inst);
-        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
-        Value *Arg = Call->getArgOperand(0);
-        Value *EarlierArg = EarlierCall->getArgOperand(0);
-        switch (PA.getAA()->alias(Arg, EarlierArg)) {
-        case AliasAnalysis::MustAlias:
-          Changed = true;
-          // If the load has a builtin retain, insert a plain retain for it.
-          if (Class == IC_LoadWeakRetained) {
-            CallInst *CI =
-              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
-                               "", Call);
-            CI->setTailCall();
-          }
-          // Zap the fully redundant load.
-          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
-          Call->eraseFromParent();
-          goto clobbered;
-        case AliasAnalysis::MayAlias:
-        case AliasAnalysis::PartialAlias:
-          goto clobbered;
-        case AliasAnalysis::NoAlias:
-          break;
-        }
-        break;
-      }
-      case IC_MoveWeak:
-      case IC_CopyWeak:
-        // TOOD: Grab the copied value.
-        goto clobbered;
-      case IC_AutoreleasepoolPush:
-      case IC_None:
-      case IC_User:
-        // Weak pointers are only modified through the weak entry points
-        // (and arbitrary calls, which could call the weak entry points).
-        break;
-      default:
-        // Anything else could modify the weak pointer.
-        goto clobbered;
-      }
-    }
-  clobbered:;
-  }
-
-  // Then, for each destroyWeak with an alloca operand, check to see if
-  // the alloca and all its users can be zapped.
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    if (Class != IC_DestroyWeak)
-      continue;
-
-    CallInst *Call = cast<CallInst>(Inst);
-    Value *Arg = Call->getArgOperand(0);
-    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
-      for (Value::use_iterator UI = Alloca->use_begin(),
-           UE = Alloca->use_end(); UI != UE; ++UI) {
-        const Instruction *UserInst = cast<Instruction>(*UI);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
-        case IC_DestroyWeak:
-          continue;
-        default:
-          goto done;
-        }
-      }
-      Changed = true;
-      for (Value::use_iterator UI = Alloca->use_begin(),
-           UE = Alloca->use_end(); UI != UE; ) {
-        CallInst *UserInst = cast<CallInst>(*UI++);
-        switch (GetBasicInstructionClass(UserInst)) {
-        case IC_InitWeak:
-        case IC_StoreWeak:
-          // These functions return their second argument.
-          UserInst->replaceAllUsesWith(UserInst->getArgOperand(1));
-          break;
-        case IC_DestroyWeak:
-          // No return value.
-          break;
-        default:
-          llvm_unreachable("alloca really is used!");
-        }
-        UserInst->eraseFromParent();
-      }
-      Alloca->eraseFromParent();
-    done:;
-    }
-  }
-}
-
-/// OptimizeSequences - Identify program paths which execute sequences of
-/// retains and releases which can be eliminated.
-bool ObjCARCOpt::OptimizeSequences(Function &F) {
-  /// Releases, Retains - These are used to store the results of the main flow
-  /// analysis. These use Value* as the key instead of Instruction* so that the
-  /// map stays valid when we get around to rewriting code and calls get
-  /// replaced by arguments.
-  DenseMap<Value *, RRInfo> Releases;
-  MapVector<Value *, RRInfo> Retains;
-
-  /// BBStates, This is used during the traversal of the function to track the
-  /// states for each identified object at each block.
-  DenseMap<const BasicBlock *, BBState> BBStates;
-
-  // Analyze the CFG of the function, and all instructions.
-  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
-
-  // Transform.
-  return PerformCodePlacement(BBStates, Retains, Releases, F.getParent()) &&
-         NestingDetected;
-}
-
-/// OptimizeReturns - Look for this pattern:
-/// \code
-///    %call = call i8* @something(...)
-///    %2 = call i8* @objc_retain(i8* %call)
-///    %3 = call i8* @objc_autorelease(i8* %2)
-///    ret i8* %3
-/// \endcode
-/// And delete the retain and autorelease.
-///
-/// Otherwise if it's just this:
-/// \code
-///    %3 = call i8* @objc_autorelease(i8* %2)
-///    ret i8* %3
-/// \endcode
-/// convert the autorelease to autoreleaseRV.
-void ObjCARCOpt::OptimizeReturns(Function &F) {
-  if (!F.getReturnType()->isPointerTy())
-    return;
-
-  SmallPtrSet<Instruction *, 4> DependingInstructions;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    BasicBlock *BB = FI;
-    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
-    if (!Ret) continue;
-
-    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
-    FindDependencies(NeedsPositiveRetainCount, Arg,
-                     BB, Ret, DependingInstructions, Visited, PA);
-    if (DependingInstructions.size() != 1)
-      goto next_block;
-
-    {
-      CallInst *Autorelease =
-        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-      if (!Autorelease)
-        goto next_block;
-      InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
-      if (!IsAutorelease(AutoreleaseClass))
-        goto next_block;
-      if (GetObjCArg(Autorelease) != Arg)
-        goto next_block;
-
-      DependingInstructions.clear();
-      Visited.clear();
-
-      // Check that there is nothing that can affect the reference
-      // count between the autorelease and the retain.
-      FindDependencies(CanChangeRetainCount, Arg,
-                       BB, Autorelease, DependingInstructions, Visited, PA);
-      if (DependingInstructions.size() != 1)
-        goto next_block;
-
-      {
-        CallInst *Retain =
-          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-        // Check that we found a retain with the same argument.
-        if (!Retain ||
-            !IsRetain(GetBasicInstructionClass(Retain)) ||
-            GetObjCArg(Retain) != Arg)
-          goto next_block;
-
-        DependingInstructions.clear();
-        Visited.clear();
-
-        // Convert the autorelease to an autoreleaseRV, since it's
-        // returning the value.
-        if (AutoreleaseClass == IC_Autorelease) {
-          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
-          AutoreleaseClass = IC_AutoreleaseRV;
-        }
-
-        // Check that there is nothing that can affect the reference
-        // count between the retain and the call.
-        // Note that Retain need not be in BB.
-        FindDependencies(CanChangeRetainCount, Arg, Retain->getParent(), Retain,
-                         DependingInstructions, Visited, PA);
-        if (DependingInstructions.size() != 1)
-          goto next_block;
-
-        {
-          CallInst *Call =
-            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-
-          // Check that the pointer is the return value of the call.
-          if (!Call || Arg != Call)
-            goto next_block;
-
-          // Check that the call is a regular call.
-          InstructionClass Class = GetBasicInstructionClass(Call);
-          if (Class != IC_CallOrUser && Class != IC_Call)
-            goto next_block;
-
-          // If so, we can zap the retain and autorelease.
-          Changed = true;
-          ++NumRets;
-          EraseInstruction(Retain);
-          EraseInstruction(Autorelease);
-        }
-      }
-    }
-
-  next_block:
-    DependingInstructions.clear();
-    Visited.clear();
-  }
-}
-
-bool ObjCARCOpt::doInitialization(Module &M) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  Run = ModuleHasARC(M);
-  if (!Run)
-    return false;
-
-  // Identify the imprecise release metadata kind.
-  ImpreciseReleaseMDKind =
-    M.getContext().getMDKindID("clang.imprecise_release");
-  CopyOnEscapeMDKind =
-    M.getContext().getMDKindID("clang.arc.copy_on_escape");
-  NoObjCARCExceptionsMDKind =
-    M.getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-
-  // Intuitively, objc_retain and others are nocapture, however in practice
-  // they are not, because they return their argument value. And objc_release
-  // calls finalizers which can have arbitrary side effects.
-
-  // These are initialized lazily.
-  RetainRVCallee = 0;
-  AutoreleaseRVCallee = 0;
-  ReleaseCallee = 0;
-  RetainCallee = 0;
-  RetainBlockCallee = 0;
-  AutoreleaseCallee = 0;
-
-  return false;
-}
-
-bool ObjCARCOpt::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  Changed = false;
-
-  PA.setAA(&getAnalysis<AliasAnalysis>());
-
-  // This pass performs several distinct transformations. As a compile-time aid
-  // when compiling code that isn't ObjC, skip these if the relevant ObjC
-  // library functions aren't declared.
-
-  // Preliminary optimizations. This also computs UsedInThisFunction.
-  OptimizeIndividualCalls(F);
-
-  // Optimizations for weak pointers.
-  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
-                            (1 << IC_LoadWeakRetained) |
-                            (1 << IC_StoreWeak) |
-                            (1 << IC_InitWeak) |
-                            (1 << IC_CopyWeak) |
-                            (1 << IC_MoveWeak) |
-                            (1 << IC_DestroyWeak)))
-    OptimizeWeakCalls(F);
-
-  // Optimizations for retain+release pairs.
-  if (UsedInThisFunction & ((1 << IC_Retain) |
-                            (1 << IC_RetainRV) |
-                            (1 << IC_RetainBlock)))
-    if (UsedInThisFunction & (1 << IC_Release))
-      // Run OptimizeSequences until it either stops making changes or
-      // no retain+release pair nesting is detected.
-      while (OptimizeSequences(F)) {}
-
-  // Optimizations if objc_autorelease is used.
-  if (UsedInThisFunction & ((1 << IC_Autorelease) |
-                            (1 << IC_AutoreleaseRV)))
-    OptimizeReturns(F);
-
-  return Changed;
-}
-
-void ObjCARCOpt::releaseMemory() {
-  PA.clear();
-}
-
-//===----------------------------------------------------------------------===//
-// ARC contraction.
-//===----------------------------------------------------------------------===//
-
-// TODO: ObjCARCContract could insert PHI nodes when uses aren't
-// dominated by single calls.
-
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/InlineAsm.h"
-#include "llvm/Operator.h"
-
-STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
-
-namespace {
-  /// ObjCARCContract - Late ARC optimizations.  These change the IR in a way
-  /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
-  class ObjCARCContract : public FunctionPass {
-    bool Changed;
-    AliasAnalysis *AA;
-    DominatorTree *DT;
-    ProvenanceAnalysis PA;
-
-    /// Run - A flag indicating whether this optimization pass should run.
-    bool Run;
-
-    /// StoreStrongCallee, etc. - Declarations for ObjC runtime
-    /// functions, for use in creating calls to them. These are initialized
-    /// lazily to avoid cluttering up the Module with unused declarations.
-    Constant *StoreStrongCallee,
-             *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
-
-    /// RetainRVMarker - The inline asm string to insert between calls and
-    /// RetainRV calls to make the optimization work on targets which need it.
-    const MDString *RetainRVMarker;
-
-    /// StoreStrongCalls - The set of inserted objc_storeStrong calls. If
-    /// at the end of walking the function we have found no alloca
-    /// instructions, these calls can be marked "tail".
-    SmallPtrSet<CallInst *, 8> StoreStrongCalls;
-
-    Constant *getStoreStrongCallee(Module *M);
-    Constant *getRetainAutoreleaseCallee(Module *M);
-    Constant *getRetainAutoreleaseRVCallee(Module *M);
-
-    bool ContractAutorelease(Function &F, Instruction *Autorelease,
-                             InstructionClass Class,
-                             SmallPtrSet<Instruction *, 4>
-                               &DependingInstructions,
-                             SmallPtrSet<const BasicBlock *, 4>
-                               &Visited);
-
-    void ContractRelease(Instruction *Release,
-                         inst_iterator &Iter);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual bool doInitialization(Module &M);
-    virtual bool runOnFunction(Function &F);
-
-  public:
-    static char ID;
-    ObjCARCContract() : FunctionPass(ID) {
-      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
-    }
-  };
-}
-
-char ObjCARCContract::ID = 0;
-INITIALIZE_PASS_BEGIN(ObjCARCContract,
-                      "objc-arc-contract", "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTree)
-INITIALIZE_PASS_END(ObjCARCContract,
-                    "objc-arc-contract", "ObjC ARC contraction", false, false)
-
-Pass *llvm::createObjCARCContractPass() {
-  return new ObjCARCContract();
-}
-
-void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
-  AU.addRequired<DominatorTree>();
-  AU.setPreservesCFG();
-}
-
-Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
-  if (!StoreStrongCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *I8XX = PointerType::getUnqual(I8X);
-    Type *Params[] = { I8XX, I8X };
-
-    AttributeSet Attributes = AttributeSet()
-      .addAttr(M->getContext(), AttributeSet::FunctionIndex,
-               Attributes::get(C, Attributes::NoUnwind))
-      .addAttr(M->getContext(), 1, Attributes::get(C, Attributes::NoCapture));
-
-    StoreStrongCallee =
-      M->getOrInsertFunction(
-        "objc_storeStrong",
-        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
-        Attributes);
-  }
-  return StoreStrongCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
-  if (!RetainAutoreleaseCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainAutoreleaseCallee =
-      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
-  }
-  return RetainAutoreleaseCallee;
-}
-
-Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
-  if (!RetainAutoreleaseRVCallee) {
-    LLVMContext &C = M->getContext();
-    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-    Type *Params[] = { I8X };
-    FunctionType *FTy = FunctionType::get(I8X, Params, /*isVarArg=*/false);
-    AttributeSet Attributes =
-      AttributeSet().addAttr(M->getContext(), AttributeSet::FunctionIndex,
-                            Attributes::get(C, Attributes::NoUnwind));
-    RetainAutoreleaseRVCallee =
-      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
-                             Attributes);
-  }
-  return RetainAutoreleaseRVCallee;
-}
-
-/// ContractAutorelease - Merge an autorelease with a retain into a fused call.
-bool
-ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
-                                     InstructionClass Class,
-                                     SmallPtrSet<Instruction *, 4>
-                                       &DependingInstructions,
-                                     SmallPtrSet<const BasicBlock *, 4>
-                                       &Visited) {
-  const Value *Arg = GetObjCArg(Autorelease);
-
-  // Check that there are no instructions between the retain and the autorelease
-  // (such as an autorelease_pop) which may change the count.
-  CallInst *Retain = 0;
-  if (Class == IC_AutoreleaseRV)
-    FindDependencies(RetainAutoreleaseRVDep, Arg,
-                     Autorelease->getParent(), Autorelease,
-                     DependingInstructions, Visited, PA);
-  else
-    FindDependencies(RetainAutoreleaseDep, Arg,
-                     Autorelease->getParent(), Autorelease,
-                     DependingInstructions, Visited, PA);
-
-  Visited.clear();
-  if (DependingInstructions.size() != 1) {
-    DependingInstructions.clear();
-    return false;
-  }
-
-  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
-  DependingInstructions.clear();
-
-  if (!Retain ||
-      GetBasicInstructionClass(Retain) != IC_Retain ||
-      GetObjCArg(Retain) != Arg)
-    return false;
-
-  Changed = true;
-  ++NumPeeps;
-
-  if (Class == IC_AutoreleaseRV)
-    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
-  else
-    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
-
-  EraseInstruction(Autorelease);
-  return true;
-}
-
-/// ContractRelease - Attempt to merge an objc_release with a store, load, and
-/// objc_retain to form an objc_storeStrong. This can be a little tricky because
-/// the instructions don't always appear in order, and there may be unrelated
-/// intervening instructions.
-void ObjCARCContract::ContractRelease(Instruction *Release,
-                                      inst_iterator &Iter) {
-  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
-  if (!Load || !Load->isSimple()) return;
-
-  // For now, require everything to be in one basic block.
-  BasicBlock *BB = Release->getParent();
-  if (Load->getParent() != BB) return;
-
-  // Walk down to find the store and the release, which may be in either order.
-  BasicBlock::iterator I = Load, End = BB->end();
-  ++I;
-  AliasAnalysis::Location Loc = AA->getLocation(Load);
-  StoreInst *Store = 0;
-  bool SawRelease = false;
-  for (; !Store || !SawRelease; ++I) {
-    if (I == End)
-      return;
-
-    Instruction *Inst = I;
-    if (Inst == Release) {
-      SawRelease = true;
-      continue;
-    }
-
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-
-    // Unrelated retains are harmless.
-    if (IsRetain(Class))
-      continue;
-
-    if (Store) {
-      // The store is the point where we're going to put the objc_storeStrong,
-      // so make sure there are no uses after it.
-      if (CanUse(Inst, Load, PA, Class))
-        return;
-    } else if (AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod) {
-      // We are moving the load down to the store, so check for anything
-      // else which writes to the memory between the load and the store.
-      Store = dyn_cast<StoreInst>(Inst);
-      if (!Store || !Store->isSimple()) return;
-      if (Store->getPointerOperand() != Loc.Ptr) return;
-    }
-  }
-
-  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
-
-  // Walk up to find the retain.
-  I = Store;
-  BasicBlock::iterator Begin = BB->begin();
-  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
-    --I;
-  Instruction *Retain = I;
-  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
-  if (GetObjCArg(Retain) != New) return;
-
-  Changed = true;
-  ++NumStoreStrongs;
-
-  LLVMContext &C = Release->getContext();
-  Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
-  Type *I8XX = PointerType::getUnqual(I8X);
-
-  Value *Args[] = { Load->getPointerOperand(), New };
-  if (Args[0]->getType() != I8XX)
-    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
-  if (Args[1]->getType() != I8X)
-    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
-  CallInst *StoreStrong =
-    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
-                     Args, "", Store);
-  StoreStrong->setDoesNotThrow();
-  StoreStrong->setDebugLoc(Store->getDebugLoc());
-
-  // We can't set the tail flag yet, because we haven't yet determined
-  // whether there are any escaping allocas. Remember this call, so that
-  // we can set the tail flag once we know it's safe.
-  StoreStrongCalls.insert(StoreStrong);
-
-  if (&*Iter == Store) ++Iter;
-  Store->eraseFromParent();
-  Release->eraseFromParent();
-  EraseInstruction(Retain);
-  if (Load->use_empty())
-    Load->eraseFromParent();
-}
-
-bool ObjCARCContract::doInitialization(Module &M) {
-  // If nothing in the Module uses ARC, don't do anything.
-  Run = ModuleHasARC(M);
-  if (!Run)
-    return false;
-
-  // These are initialized lazily.
-  StoreStrongCallee = 0;
-  RetainAutoreleaseCallee = 0;
-  RetainAutoreleaseRVCallee = 0;
-
-  // Initialize RetainRVMarker.
-  RetainRVMarker = 0;
-  if (NamedMDNode *NMD =
-        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
-    if (NMD->getNumOperands() == 1) {
-      const MDNode *N = NMD->getOperand(0);
-      if (N->getNumOperands() == 1)
-        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
-          RetainRVMarker = S;
-    }
-
-  return false;
-}
-
-bool ObjCARCContract::runOnFunction(Function &F) {
-  if (!EnableARCOpts)
-    return false;
-
-  // If nothing in the Module uses ARC, don't do anything.
-  if (!Run)
-    return false;
-
-  Changed = false;
-  AA = &getAnalysis<AliasAnalysis>();
-  DT = &getAnalysis<DominatorTree>();
-
-  PA.setAA(&getAnalysis<AliasAnalysis>());
-
-  // Track whether it's ok to mark objc_storeStrong calls with the "tail"
-  // keyword. Be conservative if the function has variadic arguments.
-  // It seems that functions which "return twice" are also unsafe for the
-  // "tail" argument, because they are setjmp, which could need to
-  // return to an earlier stack state.
-  bool TailOkForStoreStrongs = !F.isVarArg() &&
-                               !F.callsFunctionThatReturnsTwice();
-
-  // For ObjC library calls which return their argument, replace uses of the
-  // argument with uses of the call return value, if it dominates the use. This
-  // reduces register pressure.
-  SmallPtrSet<Instruction *, 4> DependingInstructions;
-  SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
-    Instruction *Inst = &*I++;
-
-    // Only these library routines return their argument. In particular,
-    // objc_retainBlock does not necessarily return its argument.
-    InstructionClass Class = GetBasicInstructionClass(Inst);
-    switch (Class) {
-    case IC_Retain:
-    case IC_FusedRetainAutorelease:
-    case IC_FusedRetainAutoreleaseRV:
-      break;
-    case IC_Autorelease:
-    case IC_AutoreleaseRV:
-      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
-        continue;
-      break;
-    case IC_RetainRV: {
-      // If we're compiling for a target which needs a special inline-asm
-      // marker to do the retainAutoreleasedReturnValue optimization,
-      // insert it now.
-      if (!RetainRVMarker)
-        break;
-      BasicBlock::iterator BBI = Inst;
-      BasicBlock *InstParent = Inst->getParent();
-
-      // Step up to see if the call immediately precedes the RetainRV call.
-      // If it's an invoke, we have to cross a block boundary. And we have
-      // to carefully dodge no-op instructions.
-      do {
-        if (&*BBI == InstParent->begin()) {
-          BasicBlock *Pred = InstParent->getSinglePredecessor();
-          if (!Pred)
-            goto decline_rv_optimization;
-          BBI = Pred->getTerminator();
-          break;
-        }
-        --BBI;
-      } while (isNoopInstruction(BBI));
-
-      if (&*BBI == GetObjCArg(Inst)) {
-        Changed = true;
-        InlineAsm *IA =
-          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
-                                           /*isVarArg=*/false),
-                         RetainRVMarker->getString(),
-                         /*Constraints=*/"", /*hasSideEffects=*/true);
-        CallInst::Create(IA, "", Inst);
-      }
-    decline_rv_optimization:
-      break;
-    }
-    case IC_InitWeak: {
-      // objc_initWeak(p, null) => *p = null
-      CallInst *CI = cast<CallInst>(Inst);
-      if (isNullOrUndef(CI->getArgOperand(1))) {
-        Value *Null =
-          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
-        Changed = true;
-        new StoreInst(Null, CI->getArgOperand(0), CI);
-        CI->replaceAllUsesWith(Null);
-        CI->eraseFromParent();
-      }
-      continue;
-    }
-    case IC_Release:
-      ContractRelease(Inst, I);
-      continue;
-    case IC_User:
-      // Be conservative if the function has any alloca instructions.
-      // Technically we only care about escaping alloca instructions,
-      // but this is sufficient to handle some interesting cases.
-      if (isa<AllocaInst>(Inst))
-        TailOkForStoreStrongs = false;
-      continue;
-    default:
-      continue;
-    }
-
-    // Don't use GetObjCArg because we don't want to look through bitcasts
-    // and such; to do the replacement, the argument must have type i8*.
-    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
-    for (;;) {
-      // If we're compiling bugpointed code, don't get in trouble.
-      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
-        break;
-      // Look through the uses of the pointer.
-      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
-           UI != UE; ) {
-        Use &U = UI.getUse();
-        unsigned OperandNo = UI.getOperandNo();
-        ++UI; // Increment UI now, because we may unlink its element.
-
-        // If the call's return value dominates a use of the call's argument
-        // value, rewrite the use to use the return value. We check for
-        // reachability here because an unreachable call is considered to
-        // trivially dominate itself, which would lead us to rewriting its
-        // argument in terms of its return value, which would lead to
-        // infinite loops in GetObjCArg.
-        if (DT->isReachableFromEntry(U) && DT->dominates(Inst, U)) {
-          Changed = true;
-          Instruction *Replacement = Inst;
-          Type *UseTy = U.get()->getType();
-          if (PHINode *PHI = dyn_cast<PHINode>(U.getUser())) {
-            // For PHI nodes, insert the bitcast in the predecessor block.
-            unsigned ValNo = PHINode::getIncomingValueNumForOperand(OperandNo);
-            BasicBlock *BB = PHI->getIncomingBlock(ValNo);
-            if (Replacement->getType() != UseTy)
-              Replacement = new BitCastInst(Replacement, UseTy, "",
-                                            &BB->back());
-            // While we're here, rewrite all edges for this PHI, rather
-            // than just one use at a time, to minimize the number of
-            // bitcasts we emit.
-            for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-              if (PHI->getIncomingBlock(i) == BB) {
-                // Keep the UI iterator valid.
-                if (&PHI->getOperandUse(
-                      PHINode::getOperandNumForIncomingValue(i)) ==
-                    &UI.getUse())
-                  ++UI;
-                PHI->setIncomingValue(i, Replacement);
-              }
-          } else {
-            if (Replacement->getType() != UseTy)
-              Replacement = new BitCastInst(Replacement, UseTy, "",
-                                            cast<Instruction>(U.getUser()));
-            U.set(Replacement);
-          }
-        }
-      }
-
-      // If Arg is a no-op casted pointer, strip one level of casts and iterate.
-      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
-        Arg = BI->getOperand(0);
-      else if (isa<GEPOperator>(Arg) &&
-               cast<GEPOperator>(Arg)->hasAllZeroIndices())
-        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
-      else if (isa<GlobalAlias>(Arg) &&
-               !cast<GlobalAlias>(Arg)->mayBeOverridden())
-        Arg = cast<GlobalAlias>(Arg)->getAliasee();
-      else
-        break;
-    }
-  }
-
-  // If this function has no escaping allocas or suspicious vararg usage,
-  // objc_storeStrong calls can be marked with the "tail" keyword.
-  if (TailOkForStoreStrongs)
-    for (SmallPtrSet<CallInst *, 8>::iterator I = StoreStrongCalls.begin(),
-         E = StoreStrongCalls.end(); I != E; ++I)
-      (*I)->setTailCall();
-  StoreStrongCalls.clear();
-
-  return Changed;
-}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 569439aaf4..0da3746950 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -28,12 +28,12 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 5524e01230..07f540a301 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -19,11 +19,11 @@
 #define DEBUG_TYPE "reg2mem"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 28aaddc50e..e30a2746b0 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -26,11 +26,11 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/DerivedTypes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/InstVisitor.h"
-#include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
@@ -271,13 +271,6 @@ public:
     return I->second;
   }
 
-  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
-    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
-      StructValueState.find(std::make_pair(V, i));
-    assert(I != StructValueState.end() && "V is not in valuemap!");
-    return I->second;
-  }*/
-
   /// getTrackedRetVals - Get the inferred return value map.
   ///
   const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
@@ -710,9 +703,6 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
     markConstant(&PN, OperandVal);      // Acquire operand value
 }
 
-
-
-
 void SCCPSolver::visitReturnInst(ReturnInst &I) {
   if (I.getNumOperands() == 0) return;  // ret void
 
@@ -1185,7 +1175,7 @@ void SCCPSolver::Solve() {
       DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
 
       // "I" got into the work list because it either made the transition from
-      // bottom to constant
+      // bottom to constant, or to overdefined.
       //
       // Anything on this worklist that is overdefined need not be visited
       // since all of its users will have already been marked as overdefined
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 1c220ca0f6..810a553c74 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -33,24 +33,22 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/PtrUseVisitor.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/IRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/InstVisitor.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -411,9 +409,9 @@ static Value *foldSelectInst(SelectInst &SI) {
   // early on.
   if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
     return SI.getOperand(1+CI->isZero());
-  if (SI.getOperand(1) == SI.getOperand(2)) {
+  if (SI.getOperand(1) == SI.getOperand(2))
     return SI.getOperand(1);
-  }
+
   return 0;
 }
 
@@ -621,7 +619,7 @@ private:
   }
 
   // Disable SRoA for any intrinsics except for lifetime invariants.
-  // FIXME: What about debug instrinsics? This matches old behavior, but
+  // FIXME: What about debug intrinsics? This matches old behavior, but
   // doesn't make sense.
   void visitIntrinsicInst(IntrinsicInst &II) {
     if (!IsOffsetKnown)
@@ -1099,13 +1097,12 @@ Type *AllocaPartitioning::getCommonType(iterator I) const {
       continue;
 
     Type *UserTy = 0;
-    if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser()))
       UserTy = LI->getType();
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) {
+    else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser()))
       UserTy = SI->getValueOperand()->getType();
-    } else {
+    else
       return 0; // Bail if we have weird uses.
-    }
 
     if (IntegerType *ITy = dyn_cast<IntegerType>(UserTy)) {
       // If the type is larger than the partition, skip it. We only encounter
@@ -1141,8 +1138,7 @@ void AllocaPartitioning::print(raw_ostream &OS, const_iterator I,
 
 void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I,
                                     StringRef Indent) const {
-  for (const_use_iterator UI = use_begin(I), UE = use_end(I);
-       UI != UE; ++UI) {
+  for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) {
     if (!UI->U)
       continue; // Skip dead uses.
     OS << Indent << "  [" << UI->BeginOffset << "," << UI->EndOffset << ") "
@@ -1170,8 +1166,7 @@ void AllocaPartitioning::print(raw_ostream &OS) const {
   }
 
   OS << "Partitioning of alloca: " << AI << "\n";
-  unsigned Num = 0;
-  for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
     print(OS, I);
     printUsers(OS, I);
   }
@@ -1242,7 +1237,7 @@ public:
     for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
            E = DVIs.end(); I != E; ++I) {
       DbgValueInst *DVI = *I;
-      Value *Arg = NULL;
+      Value *Arg = 0;
       if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
         // If an argument is zero extended then use argument directly. The ZExt
         // may be zapped by an optimization pass in future.
@@ -1277,7 +1272,7 @@ namespace {
 /// 1) It takes allocations of aggregates and analyzes the ways in which they
 ///    are used to try to split them into smaller allocations, ideally of
 ///    a single scalar data type. It will split up memcpy and memset accesses
-///    as necessary and try to isolate invidual scalar accesses.
+///    as necessary and try to isolate individual scalar accesses.
 /// 2) It will transform accesses into forms which are suitable for SSA value
 ///    promotion. This can be replacing a memset with a scalar store of an
 ///    integer value, or it can involve speculating operations on a PHI or
@@ -1439,8 +1434,7 @@ private:
     // We can only transform this if it is safe to push the loads into the
     // predecessor blocks. The only thing to watch out for is that we can't put
     // a possibly trapping load in the predecessor if it is a critical edge.
-    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num;
-         ++Idx) {
+    for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
       TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator();
       Value *InVal = PN.getIncomingValue(Idx);
 
@@ -1483,7 +1477,7 @@ private:
                                           PN.getName() + ".sroa.speculated");
 
     // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
-    // matter which one we get and if any differ, it doesn't matter.
+    // matter which one we get and if any differ.
     LoadInst *SomeLoad = cast<LoadInst>(Loads.back());
     MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
     unsigned Align = SomeLoad->getAlignment();
@@ -1576,13 +1570,13 @@ private:
 
   void visitSelectInst(SelectInst &SI) {
     DEBUG(dbgs() << "    original: " << SI << "\n");
-    IRBuilder<> IRB(&SI);
 
     // If the select isn't safe to speculate, just use simple logic to emit it.
     SmallVector<LoadInst *, 4> Loads;
     if (!isSafeSelectToSpeculate(SI, Loads))
       return;
 
+    IRBuilder<> IRB(&SI);
     Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) };
     AllocaPartitioning::iterator PIs[2];
     AllocaPartitioning::PartitionUse PUs[2];
@@ -1642,44 +1636,6 @@ private:
 };
 }
 
-/// \brief Accumulate the constant offsets in a GEP into a single APInt offset.
-///
-/// If the provided GEP is all-constant, the total byte offset formed by the
-/// GEP is computed and Offset is set to it. If the GEP has any non-constant
-/// operands, the function returns false and the value of Offset is unmodified.
-static bool accumulateGEPOffsets(const DataLayout &TD, GEPOperator &GEP,
-                                 APInt &Offset) {
-  APInt GEPOffset(Offset.getBitWidth(), 0);
-  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
-       GTI != GTE; ++GTI) {
-    ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
-    if (!OpC)
-      return false;
-    if (OpC->isZero()) continue;
-
-    // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
-      unsigned ElementIdx = OpC->getZExtValue();
-      const StructLayout *SL = TD.getStructLayout(STy);
-      GEPOffset += APInt(Offset.getBitWidth(),
-                         SL->getElementOffset(ElementIdx));
-      continue;
-    }
-
-    APInt TypeSize(Offset.getBitWidth(),
-                   TD.getTypeAllocSize(GTI.getIndexedType()));
-    if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) {
-      assert((VTy->getScalarSizeInBits() % 8) == 0 &&
-             "vector element size is not a multiple of 8, cannot GEP over it");
-      TypeSize = VTy->getScalarSizeInBits() / 8;
-    }
-
-    GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize;
-  }
-  Offset = GEPOffset;
-  return true;
-}
-
 /// \brief Build a GEP out of a base pointer and indices.
 ///
 /// This will return the BasePtr if that is valid, or build a new GEP
@@ -1762,7 +1718,7 @@ static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const DataLayout &TD,
   // extremely poorly defined currently. The long-term goal is to remove GEPing
   // over a vector from the IR completely.
   if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
-    unsigned ElementSizeInBits = VecTy->getScalarSizeInBits();
+    unsigned ElementSizeInBits = TD.getTypeSizeInBits(VecTy->getScalarType());
     if (ElementSizeInBits % 8)
       return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
     APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
@@ -1854,7 +1810,7 @@ static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const DataLayout &TD,
 /// The strategy for finding the more natural GEPs is to peel off layers of the
 /// pointer, walking back through bit casts and GEPs, searching for a base
 /// pointer from which we can compute a natural GEP with the desired
-/// properities. The algorithm tries to fold as many constant indices into
+/// properties. The algorithm tries to fold as many constant indices into
 /// a single GEP as possible, thus making each GEP more independent of the
 /// surrounding code.
 static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
@@ -1882,7 +1838,7 @@ static Value *getAdjustedPtr(IRBuilder<> &IRB, const DataLayout &TD,
     // First fold any existing GEPs into the offset.
     while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
       APInt GEPOffset(Offset.getBitWidth(), 0);
-      if (!accumulateGEPOffsets(TD, *GEP, GEPOffset))
+      if (!GEP->accumulateConstantOffset(TD, GEPOffset))
         break;
       Offset += GEPOffset;
       Ptr = GEP->getPointerOperand();
@@ -2009,15 +1965,14 @@ static bool isVectorPromotionViable(const DataLayout &TD,
   if (!Ty)
     return false;
 
-  uint64_t VecSize = TD.getTypeSizeInBits(Ty);
-  uint64_t ElementSize = Ty->getScalarSizeInBits();
+  uint64_t ElementSize = TD.getTypeSizeInBits(Ty->getScalarType());
 
   // While the definition of LLVM vectors is bitpacked, we don't support sizes
   // that aren't byte sized.
   if (ElementSize % 8)
     return false;
-  assert((VecSize % 8) == 0 && "vector size not a multiple of element size?");
-  VecSize /= 8;
+  assert((TD.getTypeSizeInBits(Ty) % 8) == 0 &&
+         "vector size not a multiple of element size?");
   ElementSize /= 8;
 
   for (; I != E; ++I) {
@@ -2101,9 +2056,9 @@ static bool isIntegerWideningViable(const DataLayout &TD,
 
   uint64_t Size = TD.getTypeStoreSize(AllocaTy);
 
-  // Check the uses to ensure the uses are (likely) promoteable integer uses.
+  // Check the uses to ensure the uses are (likely) promotable integer uses.
   // Also ensure that the alloca has a covering load or store. We don't want
-  // to widen the integer operotains only to fail to promote due to some other
+  // to widen the integer operations only to fail to promote due to some other
   // unsplittable entry (which we may make splittable later).
   bool WholeAllocaOp = false;
   for (; I != E; ++I) {
@@ -2150,7 +2105,7 @@ static bool isIntegerWideningViable(const DataLayout &TD,
           !canConvertValue(TD, ValueTy, AllocaTy))
         return false;
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) {
-      if (MI->isVolatile())
+      if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
         return false;
       if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) {
         const AllocaPartitioning::MemTransferOffsets &MTO
@@ -2223,6 +2178,84 @@ static Value *insertInteger(const DataLayout &DL, IRBuilder<> &IRB, Value *Old,
   return V;
 }
 
+static Value *extractVector(IRBuilder<> &IRB, Value *V,
+                            unsigned BeginIndex, unsigned EndIndex,
+                            const Twine &Name) {
+  VectorType *VecTy = cast<VectorType>(V->getType());
+  unsigned NumElements = EndIndex - BeginIndex;
+  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+  if (NumElements == VecTy->getNumElements())
+    return V;
+
+  if (NumElements == 1) {
+    V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
+                                 Name + ".extract");
+    DEBUG(dbgs() << "     extract: " << *V << "\n");
+    return V;
+  }
+
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(NumElements);
+  for (unsigned i = BeginIndex; i != EndIndex; ++i)
+    Mask.push_back(IRB.getInt32(i));
+  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+                              ConstantVector::get(Mask),
+                              Name + ".extract");
+  DEBUG(dbgs() << "     shuffle: " << *V << "\n");
+  return V;
+}
+
+static Value *insertVector(IRBuilder<> &IRB, Value *Old, Value *V,
+                           unsigned BeginIndex, const Twine &Name) {
+  VectorType *VecTy = cast<VectorType>(Old->getType());
+  assert(VecTy && "Can only insert a vector into a vector");
+
+  VectorType *Ty = dyn_cast<VectorType>(V->getType());
+  if (!Ty) {
+    // Single element to insert.
+    V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
+                                Name + ".insert");
+    DEBUG(dbgs() <<  "     insert: " << *V << "\n");
+    return V;
+  }
+
+  assert(Ty->getNumElements() <= VecTy->getNumElements() &&
+         "Too many elements!");
+  if (Ty->getNumElements() == VecTy->getNumElements()) {
+    assert(V->getType() == VecTy && "Vector type mismatch");
+    return V;
+  }
+  unsigned EndIndex = BeginIndex + Ty->getNumElements();
+
+  // When inserting a smaller vector into the larger to store, we first
+  // use a shuffle vector to widen it with undef elements, and then
+  // a second shuffle vector to select between the loaded vector and the
+  // incoming vector.
+  SmallVector<Constant*, 8> Mask;
+  Mask.reserve(VecTy->getNumElements());
+  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+    if (i >= BeginIndex && i < EndIndex)
+      Mask.push_back(IRB.getInt32(i - BeginIndex));
+    else
+      Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
+  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
+                              ConstantVector::get(Mask),
+                              Name + ".expand");
+  DEBUG(dbgs() << "    shuffle1: " << *V << "\n");
+
+  Mask.clear();
+  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
+    if (i >= BeginIndex && i < EndIndex)
+      Mask.push_back(IRB.getInt32(i));
+    else
+      Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
+  V = IRB.CreateShuffleVector(V, Old, ConstantVector::get(Mask),
+                              Name + "insert");
+  DEBUG(dbgs() << "    shuffle2: " << *V << "\n");
+  return V;
+}
+
 namespace {
 /// \brief Visitor to rewrite instructions using a partition of an alloca to
 /// use a new alloca.
@@ -2244,7 +2277,7 @@ class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter,
 
   // If we are rewriting an alloca partition which can be written as pure
   // vector operations, we stash extra information here. When VecTy is
-  // non-null, we have some strict guarantees about the rewriten alloca:
+  // non-null, we have some strict guarantees about the rewritten alloca:
   //   - The new alloca is exactly the size of the vector type here.
   //   - The accesses all either map to the entire vector or to a single
   //     element.
@@ -2292,9 +2325,9 @@ public:
       ++NumVectorized;
       VecTy = cast<VectorType>(NewAI.getAllocatedType());
       ElementTy = VecTy->getElementType();
-      assert((VecTy->getScalarSizeInBits() % 8) == 0 &&
+      assert((TD.getTypeSizeInBits(VecTy->getScalarType()) % 8) == 0 &&
              "Only multiple-of-8 sized vector elements are viable");
-      ElementSize = VecTy->getScalarSizeInBits() / 8;
+      ElementSize = TD.getTypeSizeInBits(VecTy->getScalarType()) / 8;
     } else if (isIntegerWideningViable(TD, NewAI.getAllocatedType(),
                                        NewAllocaBeginOffset, P, I, E)) {
       IntTy = Type::getIntNTy(NewAI.getContext(),
@@ -2388,29 +2421,14 @@ private:
       Pass.DeadInsts.insert(I);
   }
 
-  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) {
-    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                     getName(".load"));
+  Value *rewriteVectorizedLoadInst(IRBuilder<> &IRB) {
     unsigned BeginIndex = getIndex(BeginOffset);
     unsigned EndIndex = getIndex(EndOffset);
     assert(EndIndex > BeginIndex && "Empty vector!");
-    unsigned NumElements = EndIndex - BeginIndex;
-    assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
-    if (NumElements == 1) {
-      V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
-                                   getName(".extract"));
-      DEBUG(dbgs() << "     extract: " << *V << "\n");
-    } else if (NumElements < VecTy->getNumElements()) {
-      SmallVector<Constant*, 8> Mask;
-      Mask.reserve(NumElements);
-      for (unsigned i = BeginIndex; i != EndIndex; ++i)
-        Mask.push_back(IRB.getInt32(i));
-      V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
-                                  ConstantVector::get(Mask),
-                                  getName(".extract"));
-      DEBUG(dbgs() << "     shuffle: " << *V << "\n");
-    }
-    return V;
+
+    Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                     getName(".load"));
+    return extractVector(IRB, V, BeginIndex, EndIndex, getName(".vec"));
   }
 
   Value *rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) {
@@ -2431,7 +2449,6 @@ private:
     DEBUG(dbgs() << "    original: " << LI << "\n");
     Value *OldOp = LI.getOperand(0);
     assert(OldOp == OldPtr);
-    IRBuilder<> IRB(&LI);
 
     uint64_t Size = EndOffset - BeginOffset;
     bool IsSplitIntLoad = Size < TD.getTypeStoreSize(LI.getType());
@@ -2452,12 +2469,13 @@ private:
       return true;
     }
 
+    IRBuilder<> IRB(&LI);
     Type *TargetTy = IsSplitIntLoad ? Type::getIntNTy(LI.getContext(), Size * 8)
                                     : LI.getType();
     bool IsPtrAdjusted = false;
     Value *V;
     if (VecTy) {
-      V = rewriteVectorizedLoadInst(IRB, LI, OldOp);
+      V = rewriteVectorizedLoadInst(IRB);
     } else if (IntTy && LI.getType()->isIntegerTy()) {
       V = rewriteIntegerLoad(IRB, LI);
     } else if (BeginOffset == NewAllocaBeginOffset &&
@@ -2518,44 +2536,12 @@ private:
                            : VectorType::get(ElementTy, NumElements);
     if (V->getType() != PartitionTy)
       V = convertValue(TD, IRB, V, PartitionTy);
-    if (NumElements < VecTy->getNumElements()) {
-      // We need to mix in the existing elements.
-      LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
-                                           getName(".load"));
-      if (NumElements == 1) {
-        V = IRB.CreateInsertElement(LI, V, IRB.getInt32(BeginIndex),
-                                    getName(".insert"));
-        DEBUG(dbgs() <<  "     insert: " << *V << "\n");
-      } else {
-        // When inserting a smaller vector into the larger to store, we first
-        // use a shuffle vector to widen it with undef elements, and then
-        // a second shuffle vector to select between the loaded vector and the
-        // incoming vector.
-        SmallVector<Constant*, 8> Mask;
-        Mask.reserve(VecTy->getNumElements());
-        for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
-          if (i >= BeginIndex && i < EndIndex)
-            Mask.push_back(IRB.getInt32(i - BeginIndex));
-          else
-            Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
-        V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
-                                    ConstantVector::get(Mask),
-                                    getName(".expand"));
-        DEBUG(dbgs() << "    shuffle1: " << *V << "\n");
-
-        Mask.clear();
-        for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
-          if (i >= BeginIndex && i < EndIndex)
-            Mask.push_back(IRB.getInt32(i));
-          else
-            Mask.push_back(IRB.getInt32(i + VecTy->getNumElements()));
-        V = IRB.CreateShuffleVector(V, LI, ConstantVector::get(Mask),
-                                    getName("insert"));
-        DEBUG(dbgs() << "    shuffle2: " << *V << "\n");
-      }
-    } else {
-      V = convertValue(TD, IRB, V, VecTy);
-    }
+
+    // Mix in the existing elements.
+    Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                       getName(".load"));
+    V = insertVector(IRB, Old, V, BeginIndex, getName(".vec"));
+
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
     Pass.DeadInsts.insert(&SI);
 
@@ -2607,7 +2593,7 @@ private:
              TD.getTypeStoreSizeInBits(V->getType()) &&
              "Non-byte-multiple bit width");
       assert(V->getType()->getIntegerBitWidth() ==
-             TD.getTypeSizeInBits(OldAI.getAllocatedType()) &&
+             TD.getTypeAllocSizeInBits(OldAI.getAllocatedType()) &&
              "Only alloca-wide stores can be split and recomposed");
       IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), Size * 8);
       V = extractInteger(TD, IRB, V, NarrowTy, BeginOffset,
@@ -2639,6 +2625,40 @@ private:
     return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
   }
 
+  /// \brief Compute an integer value from splatting an i8 across the given
+  /// number of bytes.
+  ///
+  /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
+  /// call this routine.
+  /// FIXME: Heed the advice above.
+  ///
+  /// \param V The i8 value to splat.
+  /// \param Size The number of bytes in the output (assuming i8 is one byte)
+  Value *getIntegerSplat(IRBuilder<> &IRB, Value *V, unsigned Size) {
+    assert(Size > 0 && "Expected a positive number of bytes.");
+    IntegerType *VTy = cast<IntegerType>(V->getType());
+    assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
+    if (Size == 1)
+      return V;
+
+    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
+    V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
+                      ConstantExpr::getUDiv(
+                        Constant::getAllOnesValue(SplatIntTy),
+                        ConstantExpr::getZExt(
+                          Constant::getAllOnesValue(V->getType()),
+                          SplatIntTy)),
+                      getName(".isplat"));
+    return V;
+  }
+
+  /// \brief Compute a vector splat for a given element value.
+  Value *getVectorSplat(IRBuilder<> &IRB, Value *V, unsigned NumElements) {
+    V = IRB.CreateVectorSplat(NumElements, V, NamePrefix);
+    DEBUG(dbgs() << "       splat: " << *V << "\n");
+    return V;
+  }
+
   bool visitMemSetInst(MemSetInst &II) {
     DEBUG(dbgs() << "    original: " << II << "\n");
     IRBuilder<> IRB(&II);
@@ -2667,7 +2687,8 @@ private:
         (BeginOffset != NewAllocaBeginOffset ||
          EndOffset != NewAllocaEndOffset ||
          !AllocaTy->isSingleValueType() ||
-         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) {
+         !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)) ||
+         TD.getTypeSizeInBits(ScalarTy)%8 != 0)) {
       Type *SizeTy = II.getLength()->getType();
       Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset);
       CallInst *New
@@ -2683,53 +2704,62 @@ private:
     // If we can represent this as a simple value, we have to build the actual
     // value to store, which requires expanding the byte present in memset to
     // a sensible representation for the alloca type. This is essentially
-    // splatting the byte to a sufficiently wide integer, bitcasting to the
-    // desired scalar type, and splatting it across any desired vector type.
-    uint64_t Size = EndOffset - BeginOffset;
-    Value *V = II.getValue();
-    IntegerType *VTy = cast<IntegerType>(V->getType());
-    Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size*8);
-    if (Size*8 > VTy->getBitWidth())
-      V = IRB.CreateMul(IRB.CreateZExt(V, SplatIntTy, getName(".zext")),
-                        ConstantExpr::getUDiv(
-                          Constant::getAllOnesValue(SplatIntTy),
-                          ConstantExpr::getZExt(
-                            Constant::getAllOnesValue(V->getType()),
-                            SplatIntTy)),
-                        getName(".isplat"));
-
-    // If this is an element-wide memset of a vectorizable alloca, insert it.
-    if (VecTy && (BeginOffset > NewAllocaBeginOffset ||
-                  EndOffset < NewAllocaEndOffset)) {
-      if (V->getType() != ScalarTy)
-        V = convertValue(TD, IRB, V, ScalarTy);
-      StoreInst *Store = IRB.CreateAlignedStore(
-        IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI,
-                                                      NewAI.getAlignment(),
-                                                      getName(".load")),
-                                V, IRB.getInt32(getIndex(BeginOffset)),
-                                getName(".insert")),
-        &NewAI, NewAI.getAlignment());
-      (void)Store;
-      DEBUG(dbgs() << "          to: " << *Store << "\n");
-      return true;
-    }
+    // splatting the byte to a sufficiently wide integer, splatting it across
+    // any desired vector width, and bitcasting to the final type.
+    Value *V;
+
+    if (VecTy) {
+      // If this is a memset of a vectorized alloca, insert it.
+      assert(ElementTy == ScalarTy);
+
+      unsigned BeginIndex = getIndex(BeginOffset);
+      unsigned EndIndex = getIndex(EndOffset);
+      assert(EndIndex > BeginIndex && "Empty vector!");
+      unsigned NumElements = EndIndex - BeginIndex;
+      assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
+
+      Value *Splat = getIntegerSplat(IRB, II.getValue(),
+                                     TD.getTypeSizeInBits(ElementTy)/8);
+      Splat = convertValue(TD, IRB, Splat, ElementTy);
+      if (NumElements > 1)
+        Splat = getVectorSplat(IRB, Splat, NumElements);
 
-    // If this is a memset on an alloca where we can widen stores, insert the
-    // set integer.
-    if (IntTy && (BeginOffset > NewAllocaBeginOffset ||
-                  EndOffset < NewAllocaEndOffset)) {
-      assert(!II.isVolatile());
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                          getName(".oldload"));
-      Old = convertValue(TD, IRB, Old, IntTy);
-      assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
-      uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
-      V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
-    }
+      V = insertVector(IRB, Old, Splat, BeginIndex, getName(".vec"));
+    } else if (IntTy) {
+      // If this is a memset on an alloca where we can widen stores, insert the
+      // set integer.
+      assert(!II.isVolatile());
+
+      uint64_t Size = EndOffset - BeginOffset;
+      V = getIntegerSplat(IRB, II.getValue(), Size);
+
+      if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
+                    EndOffset != NewAllocaBeginOffset)) {
+        Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                           getName(".oldload"));
+        Old = convertValue(TD, IRB, Old, IntTy);
+        assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
+        uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
+        V = insertInteger(TD, IRB, Old, V, Offset, getName(".insert"));
+      } else {
+        assert(V->getType() == IntTy &&
+               "Wrong type for an alloca wide integer!");
+      }
+      V = convertValue(TD, IRB, V, AllocaTy);
+    } else {
+      // Established these invariants above.
+      assert(BeginOffset == NewAllocaBeginOffset);
+      assert(EndOffset == NewAllocaEndOffset);
+
+      V = getIntegerSplat(IRB, II.getValue(),
+                          TD.getTypeSizeInBits(ScalarTy)/8);
+      if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
+        V = getVectorSplat(IRB, V, AllocaVecTy->getNumElements());
 
-    if (V->getType() != AllocaTy)
       V = convertValue(TD, IRB, V, AllocaTy);
+    }
 
     Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                         II.isVolatile());
@@ -2814,37 +2844,22 @@ private:
     // Record this instruction for deletion.
     Pass.DeadInsts.insert(&II);
 
-    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
-                         EndOffset == NewAllocaEndOffset;
-    bool IsVectorElement = VecTy && !IsWholeAlloca;
-    uint64_t Size = EndOffset - BeginOffset;
-    IntegerType *SubIntTy
-      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
-
-    Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
-                              : II.getRawDest()->getType();
-    if (!EmitMemCpy) {
-      if (IsVectorElement)
-        OtherPtrTy = VecTy->getElementType()->getPointerTo();
-      else if (IntTy && !IsWholeAlloca)
-        OtherPtrTy = SubIntTy->getPointerTo();
-      else
-        OtherPtrTy = NewAI.getType();
-    }
-
-    // Compute the other pointer, folding as much as possible to produce
-    // a single, simple GEP in most cases.
-    Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
-    OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
-                              getName("." + OtherPtr->getName()));
-
     // Strip all inbounds GEPs and pointer casts to try to dig out any root
     // alloca that should be re-examined after rewriting this instruction.
+    Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
     if (AllocaInst *AI
           = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets()))
       Pass.Worklist.insert(AI);
 
     if (EmitMemCpy) {
+      Type *OtherPtrTy = IsDest ? II.getRawSource()->getType()
+                                : II.getRawDest()->getType();
+
+      // Compute the other pointer, folding as much as possible to produce
+      // a single, simple GEP in most cases.
+      OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+                                getName("." + OtherPtr->getName()));
+
       Value *OurPtr
         = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType()
                                            : II.getRawSource()->getType());
@@ -2865,18 +2880,38 @@ private:
     if (!Align)
       Align = 1;
 
-    Value *SrcPtr = OtherPtr;
+    bool IsWholeAlloca = BeginOffset == NewAllocaBeginOffset &&
+                         EndOffset == NewAllocaEndOffset;
+    uint64_t Size = EndOffset - BeginOffset;
+    unsigned BeginIndex = VecTy ? getIndex(BeginOffset) : 0;
+    unsigned EndIndex = VecTy ? getIndex(EndOffset) : 0;
+    unsigned NumElements = EndIndex - BeginIndex;
+    IntegerType *SubIntTy
+      = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+
+    Type *OtherPtrTy = NewAI.getType();
+    if (VecTy && !IsWholeAlloca) {
+      if (NumElements == 1)
+        OtherPtrTy = VecTy->getElementType();
+      else
+        OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
+
+      OtherPtrTy = OtherPtrTy->getPointerTo();
+    } else if (IntTy && !IsWholeAlloca) {
+      OtherPtrTy = SubIntTy->getPointerTo();
+    }
+
+    Value *SrcPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy,
+                                   getName("." + OtherPtr->getName()));
     Value *DstPtr = &NewAI;
     if (!IsDest)
       std::swap(SrcPtr, DstPtr);
 
     Value *Src;
-    if (IsVectorElement && !IsDest) {
-      // We have to extract rather than load.
-      Src = IRB.CreateExtractElement(
-        IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")),
-        IRB.getInt32(getIndex(BeginOffset)),
-        getName(".copyextract"));
+    if (VecTy && !IsWholeAlloca && !IsDest) {
+      Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                  getName(".load"));
+      Src = extractVector(IRB, Src, BeginIndex, EndIndex, getName(".vec"));
     } else if (IntTy && !IsWholeAlloca && !IsDest) {
       Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                   getName(".load"));
@@ -2889,7 +2924,11 @@ private:
                                   getName(".copyload"));
     }
 
-    if (IntTy && !IsWholeAlloca && IsDest) {
+    if (VecTy && !IsWholeAlloca && IsDest) {
+      Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
+                                         getName(".oldload"));
+      Src = insertVector(IRB, Old, Src, BeginIndex, getName(".vec"));
+    } else if (IntTy && !IsWholeAlloca && IsDest) {
       Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                          getName(".oldload"));
       Old = convertValue(TD, IRB, Old, IntTy);
@@ -2899,14 +2938,6 @@ private:
       Src = convertValue(TD, IRB, Src, NewAllocaTy);
     }
 
-    if (IsVectorElement && IsDest) {
-      // We have to insert into a loaded copy before storing.
-      Src = IRB.CreateInsertElement(
-        IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")),
-        Src, IRB.getInt32(getIndex(BeginOffset)),
-        getName(".insert"));
-    }
-
     StoreInst *Store = cast<StoreInst>(
       IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile()));
     (void)Store;
@@ -2934,6 +2965,7 @@ private:
     else
       New = IRB.CreateLifetimeEnd(Ptr, Size);
 
+    (void)New;
     DEBUG(dbgs() << "          to: " << *New << "\n");
     return true;
   }
@@ -3110,9 +3142,8 @@ private:
     void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
       assert(Ty->isSingleValueType());
       // Load the single value and insert it using the indices.
-      Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices,
-                                                         Name + ".gep"),
-                                   Name + ".load");
+      Value *GEP = IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep");
+      Value *Load = IRB.CreateLoad(GEP, Name + ".load");
       Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
       DEBUG(dbgs() << "          to: " << *Load << "\n");
     }
@@ -3385,7 +3416,7 @@ bool SROA::rewriteAllocaPartition(AllocaInst &AI,
   // Check for the case where we're going to rewrite to a new alloca of the
   // exact same type as the original, and with the same access offsets. In that
   // case, re-use the existing alloca, but still run through the rewriter to
-  // performe phi and select speculation.
+  // perform phi and select speculation.
   AllocaInst *NewAI;
   if (AllocaTy == AI.getAllocatedType()) {
     assert(PI->BeginOffset == 0 &&
@@ -3552,7 +3583,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
 /// If there is a domtree available, we attempt to promote using the full power
 /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
 /// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occured.
+/// promotion occurred.
 bool SROA::promoteAllocas(Function &F) {
   if (PromotableAllocas.empty())
     return false;
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 762bb15c59..8a9c7da113 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -18,7 +18,7 @@
 #include "llvm-c/Transforms/Scalar.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/Verifier.h"
-#include "llvm/DataLayout.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassManager.h"
 
@@ -50,11 +50,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLowerAtomicPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
-  initializeObjCARCAliasAnalysisPass(Registry);
-  initializeObjCARCAPElimPass(Registry);
-  initializeObjCARCExpandPass(Registry);
-  initializeObjCARCContractPass(Registry);
-  initializeObjCARCOptPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index c8656fbd8e..e590a374ea 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -27,19 +27,19 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/Constants.h"
 #include "llvm/DIBuilder.h"
-#include "llvm/DataLayout.h"
 #include "llvm/DebugInfo.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/GlobalVariable.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
-#include "llvm/Operator.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/Debug.h"
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9160f04fe2..c243d34fd7 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -26,15 +26,15 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Attributes.h"
-#include "llvm/Constants.h"
-#include "llvm/DataLayout.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/TargetTransformInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -48,12 +48,19 @@ namespace {
     }
 
     virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetTransformInfo>();
+    }
   };
 }
 
 char CFGSimplifyPass::ID = 0;
-INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
-                "Simplify the CFG", false, false)
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+                      false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG",
+                    false, false)
 
 // Public interface to the CFGSimplification pass
 FunctionPass *llvm::createCFGSimplificationPass() {
@@ -111,13 +118,11 @@ static bool markAliveBlocks(BasicBlock *BB,
 
   SmallVector<BasicBlock*, 128> Worklist;
   Worklist.push_back(BB);
+  Reachable.insert(BB);
   bool Changed = false;
   do {
     BB = Worklist.pop_back_val();
 
-    if (!Reachable.insert(BB))
-      continue;
-
     // Do a quick scan of the basic block, turning any obviously unreachable
     // instructions into LLVM unreachable insts.  The instruction combining pass
     // canonicalizes unreachable insts into stores to null or undef.
@@ -176,7 +181,8 @@ static bool markAliveBlocks(BasicBlock *BB,
 
     Changed |= ConstantFoldTerminator(BB, true);
     for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
-      Worklist.push_back(*SI);
+      if (Reachable.insert(*SI))
+        Worklist.push_back(*SI);
   } while (!Worklist.empty());
   return Changed;
 }
@@ -294,8 +300,8 @@ static bool mergeEmptyReturnBlocks(Function &F) {
 
 /// iterativelySimplifyCFG - Call SimplifyCFG on all the blocks in the function,
 /// iterating until no more changes are made.
-static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
-                                   const TargetTransformInfo *TTI) {
+static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
+                                   const DataLayout *TD) {
   bool Changed = false;
   bool LocalChange = true;
   while (LocalChange) {
@@ -304,7 +310,7 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
     // Loop over all of the basic blocks and remove them if they are unneeded...
     //
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++, TD, TTI)) {
+      if (SimplifyCFG(BBIt++, TTI, TD)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -318,12 +324,11 @@ static bool iterativelySimplifyCFG(Function &F, const DataLayout *TD,
 // simplify the CFG.
 //
 bool CFGSimplifyPass::runOnFunction(Function &F) {
+  const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
   const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
-  const TargetTransformInfo *TTI =
-      getAnalysisIfAvailable<TargetTransformInfo>();
   bool EverChanged = removeUnreachableBlocksFromFn(F);
   EverChanged |= mergeEmptyReturnBlocks(F);
-  EverChanged |= iterativelySimplifyCFG(F, TD, TTI);
+  EverChanged |= iterativelySimplifyCFG(F, TTI, TD);
 
   // If neither pass changed anything, we're done.
   if (!EverChanged) return false;
@@ -337,7 +342,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
     return true;
 
   do {
-    EverChanged = iterativelySimplifyCFG(F, TD, TTI);
+    EverChanged = iterativelySimplifyCFG(F, TTI, TD);
     EverChanged |= removeUnreachableBlocksFromFn(F);
   } while (EverChanged);
 
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index d4643b9d80..916b37d4a8 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -23,10 +23,10 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
-#include "llvm/DataLayout.h"
-#include "llvm/IRBuilder.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Module.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -165,7 +165,7 @@ bool SimplifyLibCalls::runOnFunction(Function &F) {
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
       // Ignore non-calls.
       CallInst *CI = dyn_cast<CallInst>(I++);
-      if (!CI) continue;
+      if (!CI || CI->hasFnAttr(Attribute::NoBuiltin)) continue;
 
       // Ignore indirect calls and calls to non-external functions.
       Function *Callee = CI->getCalledFunction();
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index cde9c178ad..d4595bb373 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -20,7 +20,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Assembly/Writer.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index e357378524..2002e680d1 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -58,12 +58,13 @@
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Module.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CallSite.h"
@@ -79,11 +80,15 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 
 namespace {
   struct TailCallElim : public FunctionPass {
+    const TargetTransformInfo *TTI;
+
     static char ID; // Pass identification, replacement for typeid
     TailCallElim() : FunctionPass(ID) {
       initializeTailCallElimPass(*PassRegistry::getPassRegistry());
     }
 
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
     virtual bool runOnFunction(Function &F);
 
   private:
@@ -109,14 +114,21 @@ namespace {
 }
 
 char TailCallElim::ID = 0;
-INITIALIZE_PASS(TailCallElim, "tailcallelim",
-                "Tail Call Elimination", false, false)
+INITIALIZE_PASS_BEGIN(TailCallElim, "tailcallelim",
+                      "Tail Call Elimination", false, false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_END(TailCallElim, "tailcallelim",
+                    "Tail Call Elimination", false, false)
 
 // Public interface to the TailCallElimination pass
 FunctionPass *llvm::createTailCallEliminationPass() {
   return new TailCallElim();
 }
 
+void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetTransformInfo>();
+}
+
 /// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
 /// callees of this function.  We only do very simple analysis right now, this
 /// could be expanded in the future to use mod/ref information for particular
@@ -151,6 +163,7 @@ bool TailCallElim::runOnFunction(Function &F) {
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg()) return false;
 
+  TTI = &getAnalysis<TargetTransformInfo>();
   BasicBlock *OldEntry = 0;
   bool TailCallsAreMarkedTail = false;
   SmallVector<PHINode*, 8> ArgumentPHIs;
@@ -391,7 +404,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
   if (BB == &F->getEntryBlock() &&
       FirstNonDbg(BB->front()) == CI &&
       FirstNonDbg(llvm::next(BB->begin())) == TI &&
-      callIsSmall(CI)) {
+      CI->getCalledFunction() &&
+      !TTI->isLoweredToCall(CI->getCalledFunction())) {
     // A single-block function with just a call and a return. Check that
     // the arguments match.
     CallSite::arg_iterator I = CallSite(CI).arg_begin(),