diff options
32 files changed, 3030 insertions, 2614 deletions
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index bbdd0437a1..5bec8e445a 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -26,19 +26,44 @@ namespace llvm { /// Clients should create an instance of this class when rewriting is needed, /// and destroy it when finished to allow the release of the associated /// memory. - struct SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> { + class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> { ScalarEvolution &SE; std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> > InsertedExpressions; std::set<Value*> InsertedValues; + /// PostIncLoop - When non-null, expanded addrecs referring to the given + /// loop expanded in post-inc mode. For example, expanding {1,+,1}<L> in + /// post-inc mode returns the add instruction that adds one to the phi + /// for {0,+,1}<L>, as opposed to a new phi starting at 1. This is only + /// supported in non-canonical mode. + const Loop *PostIncLoop; + + /// IVIncInsertPos - When this is non-null, addrecs expanded in the + /// loop it indicates should be inserted with increments at + /// IVIncInsertPos. + const Loop *IVIncInsertLoop; + + /// IVIncInsertPos - When expanding addrecs in the IVIncInsertLoop loop, + /// insert the IV increment at this position. + Instruction *IVIncInsertPos; + + /// CanonicalMode - When true, expressions are expanded in "canonical" + /// form. In particular, addrecs are expanded as arithmetic based on + /// a canonical induction variable. When false, expression are expanded + /// in a more literal form. + bool CanonicalMode; + + protected: typedef IRBuilder<true, TargetFolder> BuilderType; BuilderType Builder; friend struct SCEVVisitor<SCEVExpander, Value*>; public: + /// SCEVExpander - Construct a SCEVExpander in "canonical" mode. explicit SCEVExpander(ScalarEvolution &se) - : SE(se), Builder(se.getContext(), TargetFolder(se.TD)) {} + : SE(se), PostIncLoop(0), IVIncInsertLoop(0), CanonicalMode(true), + Builder(se.getContext(), TargetFolder(se.TD)) {} /// clear - Erase the contents of the InsertedExpressions map so that users /// trying to expand the same expression into multiple BasicBlocks or @@ -54,11 +79,36 @@ namespace llvm { /// expandCodeFor - Insert code to directly compute the specified SCEV /// expression into the program. The inserted code is inserted into the /// specified block. - Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *IP) { + Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I) { + BasicBlock::iterator IP = I; + while (isInsertedInstruction(IP)) ++IP; Builder.SetInsertPoint(IP->getParent(), IP); return expandCodeFor(SH, Ty); } + /// setIVIncInsertPos - Set the current IV increment loop and position. + void setIVIncInsertPos(const Loop *L, Instruction *Pos) { + assert(!CanonicalMode && + "IV increment positions are not supported in CanonicalMode"); + IVIncInsertLoop = L; + IVIncInsertPos = Pos; + } + + /// setPostInc - If L is non-null, enable post-inc expansion for addrecs + /// referring to the given loop. If L is null, disable post-inc expansion + /// completely. Post-inc expansion is only supported in non-canonical + /// mode. + void setPostInc(const Loop *L) { + assert(!CanonicalMode && + "Post-inc expansion is not supported in CanonicalMode"); + PostIncLoop = L; + } + + /// disableCanonicalMode - Disable the behavior of expanding expressions in + /// canonical form rather than in a more literal form. Non-canonical mode + /// is useful for late optimization passes. + void disableCanonicalMode() { CanonicalMode = false; } + private: LLVMContext &getContext() const { return SE.getContext(); } @@ -121,6 +171,16 @@ namespace llvm { Value *visitUnknown(const SCEVUnknown *S) { return S->getValue(); } + + void rememberInstruction(Value *I) { + if (!PostIncLoop) InsertedValues.insert(I); + } + + Value *expandAddRecExprLiterally(const SCEVAddRecExpr *); + PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + const Type *ExpandTy, + const Type *IntTy); }; } diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index 92f00273a2..38611ccb62 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -324,12 +324,6 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &U) const { // the actual replacement value. if (U.isUseOfPostIncrementedValue()) RetVal = SE->getAddExpr(RetVal, U.getParent()->Stride); - // Evaluate the expression out of the loop, if possible. - if (!L->contains(U.getUser())) { - const SCEV *ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop()); - if (ExitVal->isLoopInvariant(L)) - RetVal = ExitVal; - } return RetVal; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 7389007bf2..2f44913abd 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -1089,6 +1089,15 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, if (!isa<SCEVSignExtendExpr>(SExt)) return SExt; + // Force the cast to be folded into the operands of an addrec. + if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { + SmallVector<const SCEV *, 4> Ops; + for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); + I != E; ++I) + Ops.push_back(getAnyExtendExpr(*I, Ty)); + return getAddRecExpr(Ops, AR->getLoop()); + } + // If the expression is obviously signed, use the sext cast value. if (isa<SCEVSMaxExpr>(Op)) return SExt; @@ -1204,6 +1213,17 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVAddExpr operand types don't match!"); #endif + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (!isKnownNonNegative(Ops[i])) { + All = false; + break; + } + if (All) HasNUW = true; + } + // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, LI); @@ -1521,10 +1541,13 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEVAddExpr *S = SCEVAllocator.Allocate<SCEVAddExpr>(); - new (S) SCEVAddExpr(ID, Ops); - UniqueSCEVs.InsertNode(S, IP); + SCEVAddExpr *S = + static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + S = SCEVAllocator.Allocate<SCEVAddExpr>(); + new (S) SCEVAddExpr(ID, Ops); + UniqueSCEVs.InsertNode(S, IP); + } if (HasNUW) S->setHasNoUnsignedWrap(true); if (HasNSW) S->setHasNoSignedWrap(true); return S; @@ -1535,6 +1558,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, bool HasNUW, bool HasNSW) { assert(!Ops.empty() && "Cannot get empty mul!"); + if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG for (unsigned i = 1, e = Ops.size(); i != e; ++i) assert(getEffectiveSCEVType(Ops[i]->getType()) == @@ -1542,6 +1566,17 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, "SCEVMulExpr operand types don't match!"); #endif + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (!isKnownNonNegative(Ops[i])) { + All = false; + break; + } + if (All) HasNUW = true; + } + // Sort by complexity, this groups all similar expression types together. GroupByComplexity(Ops, LI); @@ -1576,6 +1611,22 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { // If we have a multiply of zero, it will always be zero. return Ops[0]; + } else if (Ops[0]->isAllOnesValue()) { + // If we have a mul by -1 of an add, try distributing the -1 among the + // add operands. + if (Ops.size() == 2) + if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { + SmallVector<const SCEV *, 4> NewOps; + bool AnyFolded = false; + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); + I != E; ++I) { + const SCEV *Mul = getMulExpr(Ops[0], *I); + if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; + NewOps.push_back(Mul); + } + if (AnyFolded) + return getAddExpr(NewOps); + } } } @@ -1642,7 +1693,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, // It's tempting to propagate the NSW flag here, but nsw multiplication // is not associative so this isn't necessarily safe. - const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop()); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), + HasNUW && AddRec->hasNoUnsignedWrap(), + /*HasNSW=*/false); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1696,10 +1749,13 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, for (unsigned i = 0, e = Ops.size(); i != e; ++i) ID.AddPointer(Ops[i]); void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEVMulExpr *S = SCEVAllocator.Allocate<SCEVMulExpr>(); - new (S) SCEVMulExpr(ID, Ops); - UniqueSCEVs.InsertNode(S, IP); + SCEVMulExpr *S = + static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + S = SCEVAllocator.Allocate<SCEVMulExpr>(); + new (S) SCEVMulExpr(ID, Ops); + UniqueSCEVs.InsertNode(S, IP); + } if (HasNUW) S->setHasNoUnsignedWrap(true); if (HasNSW) S->setHasNoSignedWrap(true); return S; @@ -1842,10 +1898,24 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X } + // If HasNSW is true and all the operands are non-negative, infer HasNUW. + if (!HasNUW && HasNSW) { + bool All = true; + for (unsigned i = 0, e = Operands.size(); i != e; ++i) + if (!isKnownNonNegative(Operands[i])) { + All = false; + break; + } + if (All) HasNUW = true; + } + // Canonicalize nested AddRecs in by nesting them in order of loop depth. if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { const Loop *NestedLoop = NestedAR->getLoop(); - if (L->getLoopDepth() < NestedLoop->getLoopDepth()) { + if (L->contains(NestedLoop->getHeader()) ? + (L->getLoopDepth() < NestedLoop->getLoopDepth()) : + (!NestedLoop->contains(L->getHeader()) && + DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), NestedAR->op_end()); Operands[0] = NestedAR->getStart(); @@ -1884,10 +1954,13 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, ID.AddPointer(Operands[i]); ID.AddPointer(L); void *IP = 0; - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEVAddRecExpr *S = SCEVAllocator.Allocate<SCEVAddRecExpr>(); - new (S) SCEVAddRecExpr(ID, Operands, L); - UniqueSCEVs.InsertNode(S, IP); + SCEVAddRecExpr *S = + static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); + if (!S) { + S = SCEVAllocator.Allocate<SCEVAddRecExpr>(); + new (S) SCEVAddRecExpr(ID, Operands, L); + UniqueSCEVs.InsertNode(S, IP); + } if (HasNUW) S->setHasNoUnsignedWrap(true); if (HasNSW) S->setHasNoSignedWrap(true); return S; @@ -2525,31 +2598,28 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (Accum->isLoopInvariant(L) || (isa<SCEVAddRecExpr>(Accum) && cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { + bool HasNUW = false; + bool HasNSW = false; + + // If the increment doesn't overflow, then neither the addrec nor + // the post-increment will overflow. + if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + HasNUW = true; + if (OBO->hasNoSignedWrap()) + HasNSW = true; + } + const SCEV *StartVal = getSCEV(PN->getIncomingValue(IncomingEdge)); - const SCEVAddRecExpr *PHISCEV = - cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L)); - - // If the increment doesn't overflow, then neither the addrec nor the - // post-increment will overflow. - if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) - if (OBO->getOperand(0) == PN && - getSCEV(OBO->getOperand(1)) == - PHISCEV->getStepRecurrence(*this)) { - const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this); - if (OBO->hasNoUnsignedWrap()) { - const_cast<SCEVAddRecExpr *>(PHISCEV) - ->setHasNoUnsignedWrap(true); - const_cast<SCEVAddRecExpr *>(PostInc) - ->setHasNoUnsignedWrap(true); - } - if (OBO->hasNoSignedWrap()) { - const_cast<SCEVAddRecExpr *>(PHISCEV) - ->setHasNoSignedWrap(true); - const_cast<SCEVAddRecExpr *>(PostInc) - ->setHasNoSignedWrap(true); - } - } + const SCEV *PHISCEV = + getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); + + // Since the no-wrap flags are on the increment, they apply to the + // post-incremented value as well. + if (Accum->isLoopInvariant(L)) + (void)getAddRecExpr(getAddExpr(StartVal, Accum), + Accum, L, HasNUW, HasNSW); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -2781,26 +2851,29 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); - if (!Trip) return FullSet; + ConstantRange ConservativeResult = FullSet; + + // If there's no unsigned wrap, the value will never be less than its + // initial value. + if (AddRec->hasNoUnsignedWrap()) + if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart())) + ConservativeResult = + ConstantRange(C->getValue()->getValue(), + APInt(getTypeSizeInBits(C->getType()), 0)); // TODO: non-affine addrec - if (AddRec->isAffine()) { + if (Trip && AddRec->isAffine()) { const Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*this); const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); // Check for overflow. - // TODO: This is very conservative. - if (!(Step->isOne() && - isKnownPredicate(ICmpInst::ICMP_ULT, Start, End)) && - !(Step->isAllOnesValue() && - isKnownPredicate(ICmpInst::ICMP_UGT, Start, End))) - return FullSet; + if (!AddRec->hasNoUnsignedWrap()) + return ConservativeResult; ConstantRange StartRange = getUnsignedRange(Start); ConstantRange EndRange = getUnsignedRange(End); @@ -2809,10 +2882,12 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { APInt Max = APIntOps::umax(StartRange.getUnsignedMax(), EndRange.getUnsignedMax()); if (Min.isMinValue() && Max.isMaxValue()) - return FullSet; + return ConservativeResult; return ConstantRange(Min, Max+1); } } + + return ConservativeResult; } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { @@ -2891,26 +2966,39 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { const SCEV *T = getBackedgeTakenCount(AddRec->getLoop()); const SCEVConstant *Trip = dyn_cast<SCEVConstant>(T); - if (!Trip) return FullSet; + ConstantRange ConservativeResult = FullSet; + + // If there's no signed wrap, and all the operands have the same sign or + // zero, the value won't ever change sign. + if (AddRec->hasNoSignedWrap()) { + bool AllNonNeg = true; + bool AllNonPos = true; + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false; + if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false; + } + unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); + if (AllNonNeg) + ConservativeResult = ConstantRange(APInt(BitWidth, 0), + APInt::getSignedMinValue(BitWidth)); + else if (AllNonPos) + ConservativeResult = ConstantRange(APInt::getSignedMinValue(BitWidth), + APInt(BitWidth, 1)); + } // TODO: non-affine addrec - if (AddRec->isAffine()) { + if (Trip && AddRec->isAffine()) { const Type *Ty = AddRec->getType(); const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop()); if (getTypeSizeInBits(MaxBECount->getType()) <= getTypeSizeInBits(Ty)) { MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty); const SCEV *Start = AddRec->getStart(); - const SCEV *Step = AddRec->getStepRecurrence(*this); const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); // Check for overflow. - // TODO: This is very conservative. - if (!(Step->isOne() && - isKnownPredicate(ICmpInst::ICMP_SLT, Start, End)) && - !(Step->isAllOnesValue() && - isKnownPredicate(ICmpInst::ICMP_SGT, Start, End))) - return FullSet; + if (!AddRec->hasNoSignedWrap()) + return ConservativeResult; ConstantRange StartRange = getSignedRange(Start); ConstantRange EndRange = getSignedRange(End); @@ -2919,15 +3007,19 @@ ScalarEvolution::getSignedRange(const SCEV *S) { APInt Max = APIntOps::smax(StartRange.getSignedMax(), EndRange.getSignedMax()); if (Min.isMinSignedValue() && Max.isMaxSignedValue()) - return FullSet; + return ConservativeResult; return ConstantRange(Min, Max+1); } } + + return ConservativeResult; } if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { // For a SCEVUnknown, ask ValueTracking. unsigned BitWidth = getTypeSizeInBits(U->getType()); + if (!U->getValue()->getType()->isInteger() && !TD) + return FullSet; unsigned NS = ComputeNumSignBits(U->getValue(), TD); if (NS == 1) return FullSet; diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 2c66f1ac2c..b049f424fa 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -81,7 +81,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), A->getParent()->getEntryBlock().begin()); - InsertedValues.insert(I); + rememberInstruction(I); return I; } @@ -114,7 +114,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { IP = II->getNormalDest()->begin(); while (isa<PHINode>(IP)) ++IP; Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP); - InsertedValues.insert(CI); + rememberInstruction(CI); return CI; } @@ -144,7 +144,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // If we haven't found this binop, insert it. Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"); - InsertedValues.insert(BO); + rememberInstruction(BO); return BO; } @@ -491,22 +491,39 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // Emit a GEP. Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); - InsertedValues.insert(GEP); + rememberInstruction(GEP); return GEP; } // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, // because ScalarEvolution may have changed the address arithmetic to // compute a value which is beyond the end of the allocated object. - Value *GEP = Builder.CreateGEP(V, + Value *Casted = V; + if (V->getType() != PTy) + Casted = InsertNoopCastOfTo(Casted, PTy); + Value *GEP = Builder.CreateGEP(Casted, GepIndices.begin(), GepIndices.end(), "scevgep"); Ops.push_back(SE.getUnknown(GEP)); - InsertedValues.insert(GEP); + rememberInstruction(GEP); return expand(SE.getAddExpr(Ops)); } +/// isNonConstantNegative - Return true if the specified scev is negated, but +/// not a constant. +static bool isNonConstantNegative(const SCEV *F) { + const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F); + if (!Mul) return false; + + // If there is a constant factor, it will be first. + const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); + if (!SC) return false; + + // Return true if the value is negative, this matches things like (-42 * V). + return SC->getValue()->getValue().isNegative(); +} + Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { int NumOperands = S->getNumOperands(); const Type *Ty = SE.getEffectiveSCEVType(S->getType()); @@ -539,8 +556,14 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit a bunch of add instructions for (int i = NumOperands-1; i >= 0; --i) { if (i == PIdx) continue; - Value *W = expandCodeFor(S->getOperand(i), Ty); - V = InsertBinop(Instruction::Add, V, W); + const SCEV *Op = S->getOperand(i); + if (isNonConstantNegative(Op)) { + Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); + V = InsertBinop(Instruction::Sub, V, W); + } else { + Value *W = expandCodeFor(Op, Ty); + V = InsertBinop(Instruction::Add, V, W); + } } return V; } @@ -603,7 +626,175 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, } } +/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand +/// the base addrec, which is the addrec without any non-loop-dominating +/// values, and return the PHI. +PHINode * +SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, + const Loop *L, + const Type *ExpandTy, + const Type *IntTy) { + // Reuse a previously-inserted PHI, if present. + for (BasicBlock::iterator I = L->getHeader()->begin(); + PHINode *PN = dyn_cast<PHINode>(I); ++I) + if (isInsertedInstruction(PN) && SE.getSCEV(PN) == Normalized) + return PN; + + // Save the original insertion point so we can restore it when we're done. + BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); + BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + + // Expand code for the start value. + Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, + L->getHeader()->begin()); + + // Expand code for the step value. Insert instructions right before the + // terminator corresponding to the back-edge. Do this before creating the PHI + // so that PHI reuse code doesn't see an incomplete PHI. If the stride is + // negative, insert a sub instead of an add for the increment (unless it's a + // constant, because subtracts of constants are canonicalized to adds). + const SCEV *Step = Normalized->getStepRecurrence(SE); + bool isPointer = isa<PointerType>(ExpandTy); + bool isNegative = !isPointer && isNonConstantNegative(Step); + if (isNegative) + Step = SE.getNegativeSCEV(Step); + Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + + // Create the PHI. + Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin()); + PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv"); + rememberInstruction(PN); + + // Create the step instructions and populate the PHI. + BasicBlock *Header = L->getHeader(); + for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); + HPI != HPE; ++HPI) { + BasicBlock *Pred = *HPI; + + // Add a start value. + if (!L->contains(Pred)) { + PN->addIncoming(StartV, Pred); + continue; + } + + // Create a step value and add it to the PHI. If IVIncInsertLoop is + // non-null and equal to the addrec's loop, insert the instructions + // at IVIncInsertPos. + Instruction *InsertPos = L == IVIncInsertLoop ? + IVIncInsertPos : Pred->getTerminator(); + Builder.SetInsertPoint(InsertPos->getParent(), InsertPos); + Value *IncV; + // If the PHI is a pointer, use a GEP, otherwise use an add or sub. + if (isPointer) { + const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); + // If the step isn't constant, don't use an implicitly scaled GEP, because + // that would require a multiply inside the loop. + if (!isa<ConstantInt>(StepV)) + GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), + GEPPtrTy->getAddressSpace()); + const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; + IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); + if (IncV->getType() != PN-> |