32 files changed, 3030 insertions, 2614 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index bbdd0437a1..5bec8e445a 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -26,19 +26,44 @@ namespace llvm {
   /// Clients should create an instance of this class when rewriting is needed,
   /// and destroy it when finished to allow the release of the associated
   /// memory.
-  struct SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
+  class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
     ScalarEvolution &SE;
     std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
       InsertedExpressions;
     std::set<Value*> InsertedValues;
 
+    /// PostIncLoop - When non-null, expanded addrecs referring to the given
+    /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in
+    /// post-inc mode returns the add instruction that adds one to the phi
+    /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only
+    /// supported in non-canonical mode.
+    const Loop *PostIncLoop;
+
+    /// IVIncInsertPos - When this is non-null, addrecs expanded in the
+    /// loop it indicates should be inserted with increments at
+    /// IVIncInsertPos.
+    const Loop *IVIncInsertLoop;
+
+    /// IVIncInsertPos - When expanding addrecs in the IVIncInsertLoop loop,
+    /// insert the IV increment at this position.
+    Instruction *IVIncInsertPos;
+
+    /// CanonicalMode - When true, expressions are expanded in "canonical"
+    /// form. In particular, addrecs are expanded as arithmetic based on
+    /// a canonical induction variable. When false, expression are expanded
+    /// in a more literal form.
+    bool CanonicalMode;
+
+  protected:
     typedef IRBuilder<true, TargetFolder> BuilderType;
     BuilderType Builder;
 
     friend struct SCEVVisitor<SCEVExpander, Value*>;
   public:
+    /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
     explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), Builder(se.getContext(), TargetFolder(se.TD)) {}
+      : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true),
+        Builder(se.getContext(), TargetFolder(se.TD)) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
     /// trying to expand the same expression into multiple BasicBlocks or
@@ -54,11 +79,36 @@ namespace llvm {
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
     /// specified block.
-    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *IP) {
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I) {
+      BasicBlock::iterator IP = I;
+      while (isInsertedInstruction(IP)) ++IP;
       Builder.SetInsertPoint(IP->getParent(), IP);
       return expandCodeFor(SH, Ty);
     }
 
+    /// setIVIncInsertPos - Set the current IV increment loop and position.
+    void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
+      assert(!CanonicalMode &&
+             "IV increment positions are not supported in CanonicalMode");
+      IVIncInsertLoop = L;
+      IVIncInsertPos = Pos;
+    }
+
+    /// setPostInc - If L is non-null, enable post-inc expansion for addrecs
+    /// referring to the given loop. If L is null, disable post-inc expansion
+    /// completely. Post-inc expansion is only supported in non-canonical
+    /// mode.
+    void setPostInc(const Loop *L) {
+      assert(!CanonicalMode &&
+             "Post-inc expansion is not supported in CanonicalMode");
+      PostIncLoop = L;
+    }
+
+    /// disableCanonicalMode - Disable the behavior of expanding expressions in
+    /// canonical form rather than in a more literal form. Non-canonical mode
+    /// is useful for late optimization passes.
+    void disableCanonicalMode() { CanonicalMode = false; }
+
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
@@ -121,6 +171,16 @@ namespace llvm {
     Value *visitUnknown(const SCEVUnknown *S) {
       return S->getValue();
     }
+
+    void rememberInstruction(Value *I) {
+      if (!PostIncLoop) InsertedValues.insert(I);
+    }
+
+    Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
+    PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                       const Loop *L,
+                                       const Type *ExpandTy,
+                                       const Type *IntTy);
   };
 }
 
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 92f00273a2..38611ccb62 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -324,12 +324,6 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const {
   // the actual replacement value.
   if (U.isUseOfPostIncrementedValue())
     RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride);
-  // Evaluate the expression out of the loop, if possible.
-  if (!L->contains(U.getUser())) {
-    const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
-    if (ExitVal->isLoopInvariant(L))
-      RetVal = ExitVal;
-  }
   return RetVal;
 }
 
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 7389007bf2..2f44913abd 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -1089,6 +1089,15 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
   if (!isa<SCEVSignExtendExpr>(SExt))
     return SExt;
 
+  // Force the cast to be folded into the operands of an addrec.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Ops;
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    return getAddRecExpr(Ops, AR->getLoop());
+  }
+
   // If the expression is obviously signed, use the sext cast value.
   if (isa<SCEVSMaxExpr>(Op))
     return SExt;
@@ -1204,6 +1213,17 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVAddExpr operand types don't match!");
 #endif
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (!isKnownNonNegative(Ops[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, LI);
 
@@ -1521,10 +1541,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>();
-  new (S) SCEVAddExpr(ID, Ops);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVAddExpr *S =
+    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVAddExpr>();
+    new (S) SCEVAddExpr(ID, Ops);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -1535,6 +1558,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
 const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         bool HasNUW, bool HasNSW) {
   assert(!Ops.empty() && "Cannot get empty mul!");
+  if (Ops.size() == 1) return Ops[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
     assert(getEffectiveSCEVType(Ops[i]->getType()) ==
@@ -1542,6 +1566,17 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVMulExpr operand types don't match!");
 #endif
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (!isKnownNonNegative(Ops[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Sort by complexity, this groups all similar expression types together.
   GroupByComplexity(Ops, LI);
 
@@ -1576,6 +1611,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
       // If we have a multiply of zero, it will always be zero.
       return Ops[0];
+    } else if (Ops[0]->isAllOnesValue()) {
+      // If we have a mul by -1 of an add, try distributing the -1 among the
+      // add operands.
+      if (Ops.size() == 2)
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+          SmallVector<const SCEV *, 4> NewOps;
+          bool AnyFolded = false;
+          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+               I != E; ++I) {
+            const SCEV *Mul = getMulExpr(Ops[0], *I);
+            if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+            NewOps.push_back(Mul);
+          }
+          if (AnyFolded)
+            return getAddExpr(NewOps);
+        }
     }
   }
 
@@ -1642,7 +1693,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
 
       // It's tempting to propagate the NSW flag here, but nsw multiplication
       // is not associative so this isn't necessarily safe.
-      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop());
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
+                                         HasNUW && AddRec->hasNoUnsignedWrap(),
+                                         /*HasNSW=*/false);
 
       // If all of the other operands were loop invariant, we are done.
       if (Ops.size() == 1) return NewRec;
@@ -1696,10 +1749,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     ID.AddPointer(Ops[i]);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>();
-  new (S) SCEVMulExpr(ID, Ops);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVMulExpr>();
+    new (S) SCEVMulExpr(ID, Ops);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -1842,10 +1898,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
   }
 
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+      if (!isKnownNonNegative(Operands[i])) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop *NestedLoop = NestedAR->getLoop();
-    if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
+    if (L->contains(NestedLoop->getHeader()) ?
+        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+        (!NestedLoop->contains(L->getHeader()) &&
+         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                   NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
@@ -1884,10 +1954,13 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
     ID.AddPointer(Operands[i]);
   ID.AddPointer(L);
   void *IP = 0;
-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-  SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
-  new (S) SCEVAddRecExpr(ID, Operands, L);
-  UniqueSCEVs.InsertNode(S, IP);
+  SCEVAddRecExpr *S =
+    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    S = SCEVAllocator.Allocate<SCEVAddRecExpr>();
+    new (S) SCEVAddRecExpr(ID, Operands, L);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
   if (HasNUW) S->setHasNoUnsignedWrap(true);
   if (HasNSW) S->setHasNoSignedWrap(true);
   return S;
@@ -2525,31 +2598,28 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
             if (Accum->isLoopInvariant(L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+              bool HasNUW = false;
+              bool HasNSW = false;
+
+              // If the increment doesn't overflow, then neither the addrec nor
+              // the post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  HasNUW = true;
+                if (OBO->hasNoSignedWrap())
+                  HasNSW = true;
+              }
+
               const SCEV *StartVal =
                 getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEVAddRecExpr *PHISCEV =
-                cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
-
-              // If the increment doesn't overflow, then neither the addrec nor the
-              // post-increment will overflow.
-              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
-                if (OBO->getOperand(0) == PN &&
-                    getSCEV(OBO->getOperand(1)) ==
-                      PHISCEV->getStepRecurrence(*this)) {
-                  const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
-                  if (OBO->hasNoUnsignedWrap()) {
-                    const_cast<SCEVAddRecExpr *>(PHISCEV)
-                      ->setHasNoUnsignedWrap(true);
-                    const_cast<SCEVAddRecExpr *>(PostInc)
-                      ->setHasNoUnsignedWrap(true);
-                  }
-                  if (OBO->hasNoSignedWrap()) {
-                    const_cast<SCEVAddRecExpr *>(PHISCEV)
-                      ->setHasNoSignedWrap(true);
-                    const_cast<SCEVAddRecExpr *>(PostInc)
-                      ->setHasNoSignedWrap(true);
-                  }
-                }
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
+
+              // Since the no-wrap flags are on the increment, they apply to the
+              // post-incremented value as well.
+              if (Accum->isLoopInvariant(L))
+                (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+                                    Accum, L, HasNUW, HasNSW);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and purge all of the
@@ -2781,26 +2851,29 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
     const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    if (!Trip) return FullSet;
+    ConstantRange ConservativeResult = FullSet;
+
+    // If there's no unsigned wrap, the value will never be less than its
+    // initial value.
+    if (AddRec->hasNoUnsignedWrap())
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+        ConservativeResult =
+          ConstantRange(C->getValue()->getValue(),
+                        APInt(getTypeSizeInBits(C->getType()), 0));
 
     // TODO: non-affine addrec
-    if (AddRec->isAffine()) {
+    if (Trip && AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *Step = AddRec->getStepRecurrence(*this);
         const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
 
         // Check for overflow.
-        // TODO: This is very conservative.
-        if (!(Step->isOne() &&
-              isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) &&
-            !(Step->isAllOnesValue() &&
-              isKnownPredicate(ICmpInst::ICMP_UGT, Start, End)))
-          return FullSet;
+        if (!AddRec->hasNoUnsignedWrap())
+          return ConservativeResult;
 
         ConstantRange StartRange = getUnsignedRange(Start);
         ConstantRange EndRange = getUnsignedRange(End);
@@ -2809,10 +2882,12 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
         APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
                                    EndRange.getUnsignedMax());
         if (Min.isMinValue() && Max.isMaxValue())
-          return FullSet;
+          return ConservativeResult;
         return ConstantRange(Min, Max+1);
       }
     }
+
+    return ConservativeResult;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
@@ -2891,26 +2966,39 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
     const SCEV *T = getBackedgeTakenCount(AddRec->getLoop());
     const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T);
-    if (!Trip) return FullSet;
+    ConstantRange ConservativeResult = FullSet;
+
+    // If there's no signed wrap, and all the operands have the same sign or
+    // zero, the value won't ever change sign.
+    if (AddRec->hasNoSignedWrap()) {
+      bool AllNonNeg = true;
+      bool AllNonPos = true;
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      }
+      unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+      if (AllNonNeg)
+        ConservativeResult = ConstantRange(APInt(BitWidth, 0),
+                                           APInt::getSignedMinValue(BitWidth));
+      else if (AllNonPos)
+        ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth),
+                                           APInt(BitWidth, 1));
+    }
 
     // TODO: non-affine addrec
-    if (AddRec->isAffine()) {
+    if (Trip && AddRec->isAffine()) {
       const Type *Ty = AddRec->getType();
       const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
       if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) {
         MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
 
         const SCEV *Start = AddRec->getStart();
-        const SCEV *Step = AddRec->getStepRecurrence(*this);
         const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this);
 
         // Check for overflow.
-        // TODO: This is very conservative.
-        if (!(Step->isOne() &&
-              isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) &&
-            !(Step->isAllOnesValue() &&
-              isKnownPredicate(ICmpInst::ICMP_SGT, Start, End)))
-          return FullSet;
+        if (!AddRec->hasNoSignedWrap())
+          return ConservativeResult;
 
         ConstantRange StartRange = getSignedRange(Start);
         ConstantRange EndRange = getSignedRange(End);
@@ -2919,15 +3007,19 @@ ScalarEvolution::getSignedRange(const SCEV *S) {
         APInt Max = APIntOps::smax(StartRange.getSignedMax(),
                                    EndRange.getSignedMax());
         if (Min.isMinSignedValue() && Max.isMaxSignedValue())
-          return FullSet;
+          return ConservativeResult;
         return ConstantRange(Min, Max+1);
       }
     }
+
+    return ConservativeResult;
   }
 
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
+    if (!U->getValue()->getType()->isInteger() && !TD)
+      return FullSet;
     unsigned NS = ComputeNumSignBits(U->getValue(), TD);
     if (NS == 1)
       return FullSet;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 2c66f1ac2c..b049f424fa 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -81,7 +81,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
 
     Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
-    InsertedValues.insert(I);
+    rememberInstruction(I);
     return I;
   }
 
@@ -114,7 +114,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
     IP = II->getNormalDest()->begin();
   while (isa<PHINode>(IP)) ++IP;
   Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
-  InsertedValues.insert(CI);
+  rememberInstruction(CI);
   return CI;
 }
 
@@ -144,7 +144,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
 
   // If we haven't found this binop, insert it.
   Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
-  InsertedValues.insert(BO);
+  rememberInstruction(BO);
   return BO;
 }
 
@@ -491,22 +491,39 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
 
     // Emit a GEP.
     Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
-    InsertedValues.insert(GEP);
+    rememberInstruction(GEP);
     return GEP;
   }
 
   // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
   // because ScalarEvolution may have changed the address arithmetic to
   // compute a value which is beyond the end of the allocated object.
-  Value *GEP = Builder.CreateGEP(V,
+  Value *Casted = V;
+  if (V->getType() != PTy)
+    Casted = InsertNoopCastOfTo(Casted, PTy);
+  Value *GEP = Builder.CreateGEP(Casted,
                                  GepIndices.begin(),
                                  GepIndices.end(),
                                  "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
-  InsertedValues.insert(GEP);
+  rememberInstruction(GEP);
   return expand(SE.getAddExpr(Ops));
 }
 
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   int NumOperands = S->getNumOperands();
   const Type *Ty = SE.getEffectiveSCEVType(S->getType());
@@ -539,8 +556,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
   // Emit a bunch of add instructions
   for (int i = NumOperands-1; i >= 0; --i) {
     if (i == PIdx) continue;
-    Value *W = expandCodeFor(S->getOperand(i), Ty);
-    V = InsertBinop(Instruction::Add, V, W);
+    const SCEV *Op = S->getOperand(i);
+    if (isNonConstantNegative(Op)) {
+      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+      V = InsertBinop(Instruction::Sub, V, W);
+    } else {
+      Value *W = expandCodeFor(Op, Ty);
+      V = InsertBinop(Instruction::Add, V, W);
+    }
   }
   return V;
 }
@@ -603,7 +626,175 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
   }
 }
 
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                        const Loop *L,
+                                        const Type *ExpandTy,
+                                        const Type *IntTy) {
+  // Reuse a previously-inserted PHI, if present.
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized)
+      return PN;
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Expand code for the start value.
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getHeader()->begin());
+
+  // Expand code for the step value. Insert instructions right before the
+  // terminator corresponding to the back-edge. Do this before creating the PHI
+  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+  // negative, insert a sub instead of an add for the increment (unless it's a
+  // constant, because subtracts of constants are canonicalized to adds).
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  bool isPointer = isa<PointerType>(ExpandTy);
+  bool isNegative = !isPointer && isNonConstantNegative(Step);
+  if (isNegative)
+    Step = SE.getNegativeSCEV(Step);
+  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+  // Create the PHI.
+  Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin());
+  PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv");
+  rememberInstruction(PN);
+
+  // Create the step instructions and populate the PHI.
+  BasicBlock *Header = L->getHeader();
+  for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+       HPI != HPE; ++HPI) {
+    BasicBlock *Pred = *HPI;
+
+    // Add a start value.
+    if (!L->contains(Pred)) {
+      PN->addIncoming(StartV, Pred);
+      continue;
+    }
+
+    // Create a step value and add it to the PHI. If IVIncInsertLoop is
+    // non-null and equal to the addrec's loop, insert the instructions
+    // at IVIncInsertPos.
+    Instruction *InsertPos = L == IVIncInsertLoop ?
+      IVIncInsertPos : Pred->getTerminator();
+    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+    Value *IncV;
+    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+    if (isPointer) {
+      const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+      // If the step isn't constant, don't use an implicitly scaled GEP, because
+      // that would require a multiply inside the loop.
+      if (!isa<ConstantInt>(StepV))
+        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                    GEPPtrTy->getAddressSpace());
+      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+      if (IncV->getType() != PN->