aboutsummaryrefslogtreecommitdiff
path: root/lib/Transforms/Scalar/LoopStrengthReduce.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Transforms/Scalar/LoopStrengthReduce.cpp')
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp118
1 files changed, 63 insertions, 55 deletions
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 625a75d6cc..631092b326 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -781,10 +781,10 @@ struct LSRFixup {
/// will be replaced.
Value *OperandValToReplace;
- /// PostIncLoop - If this user is to use the post-incremented value of an
+ /// PostIncLoops - If this user is to use the post-incremented value of an
/// induction variable, this variable is non-null and holds the loop
/// associated with the induction variable.
- const Loop *PostIncLoop;
+ PostIncLoopSet PostIncLoops;
/// LUIdx - The index of the LSRUse describing the expression which
/// this fixup needs, minus an offset (below).
@@ -795,6 +795,8 @@ struct LSRFixup {
/// offsets, for example in an unrolled loop.
int64_t Offset;
+ bool isUseFullyOutsideLoop(const Loop *L) const;
+
LSRFixup();
void print(raw_ostream &OS) const;
@@ -804,9 +806,24 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), PostIncLoop(0),
+ : UserInst(0), OperandValToReplace(0),
LUIdx(~size_t(0)), Offset(0) {}
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+ // PHI nodes use their value in their incoming blocks.
+ if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == OperandValToReplace &&
+ L->contains(PN->getIncomingBlock(i)))
+ return false;
+ return true;
+ }
+
+ return !L->contains(UserInst);
+}
+
void LSRFixup::print(raw_ostream &OS) const {
OS << "UserInst=";
// Store is common and interesting enough to be worth special-casing.
@@ -821,9 +838,10 @@ void LSRFixup::print(raw_ostream &OS) const {
OS << ", OperandValToReplace=";
WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
- if (PostIncLoop) {
+ for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+ E = PostIncLoops.end(); I != E; ++I) {
OS << ", PostIncLoop=";
- WriteAsOperand(OS, PostIncLoop->getHeader(), /*PrintType=*/false);
+ WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
}
if (LUIdx != ~size_t(0))
@@ -1545,8 +1563,9 @@ LSRInstance::OptimizeLoopTermCond() {
!DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
// Conservatively assume there may be reuse if the quotient of their
// strides could be a legal scale.
- const SCEV *A = CondUse->getStride();
- const SCEV *B = UI->getStride();
+ const SCEV *A = CondUse->getStride(L);
+ const SCEV *B = UI->getStride(L);
+ if (!A || !B) continue;
if (SE.getTypeSizeInBits(A->getType()) !=
SE.getTypeSizeInBits(B->getType())) {
if (SE.getTypeSizeInBits(A->getType()) >
@@ -1598,7 +1617,7 @@ LSRInstance::OptimizeLoopTermCond() {
ExitingBlock->getInstList().insert(TermBr, Cond);
// Clone the IVUse, as the old use still exists!
- CondUse = &IU.AddUser(CondUse->getStride(), CondUse->getOffset(),
+ CondUse = &IU.AddUser(CondUse->getExpr(),
Cond, CondUse->getOperandValToReplace());
TermBr->replaceUsesOfWith(OldCond, Cond);
}
@@ -1607,9 +1626,7 @@ LSRInstance::OptimizeLoopTermCond() {
// If we get to here, we know that we can transform the setcc instruction to
// use the post-incremented version of the IV, allowing us to coalesce the
// live ranges for the IV correctly.
- CondUse->setOffset(SE.getMinusSCEV(CondUse->getOffset(),
- CondUse->getStride()));
- CondUse->setIsUseOfPostIncrementedValue(true);
+ CondUse->transformToPostInc(L);
Changed = true;
PostIncs.insert(Cond);
@@ -1717,19 +1734,24 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
// Collect interesting types and strides.
+ SmallVector<const SCEV *, 4> Worklist;
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
- const SCEV *Stride = UI->getStride();
+ const SCEV *Expr = UI->getExpr();
// Collect interesting types.
- Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
-
- // Add the stride for this loop.
- Strides.insert(Stride);
-
- // Add strides for other mentioned loops.
- for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
- AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
- Strides.insert(AR->getStepRecurrence(SE));
+ Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+ // Add strides for mentioned loops.
+ Worklist.push_back(Expr);
+ do {
+ const SCEV *S = Worklist.pop_back_val();
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ Strides.insert(AR->getStepRecurrence(SE));
+ Worklist.push_back(AR->getStart());
+ } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+ }
+ } while (!Worklist.empty());
}
// Compute interesting factors from the set of interesting strides.
@@ -1776,8 +1798,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = UI->getUser();
LF.OperandValToReplace = UI->getOperandValToReplace();
- if (UI->isUseOfPostIncrementedValue())
- LF.PostIncLoop = L;
+ LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
const Type *AccessTy = 0;
@@ -1786,7 +1807,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
AccessTy = getAccessType(LF.UserInst);
}
- const SCEV *S = IU.getCanonicalExpr(*UI);
+ const SCEV *S = UI->getExpr();
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
// (N - i == 0), and this allows (N - i) to be the expression that we work
@@ -1824,7 +1845,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
- LU.AllFixupsOutsideLoop &= !L->contains(LF.UserInst);
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
@@ -1936,7 +1957,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
- LU.AllFixupsOutsideLoop &= L->contains(LF.UserInst);
+ LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
@@ -2783,8 +2804,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
- // Then, collect some instructions which we will remain dominated by when
- // expanding the replacement. These must be dominated by any operands that
+ // Then, collect some instructions which must be dominated by the
+ // expanding replacement. These must be dominated by any operands that
// will be required in the expansion.
SmallVector<Instruction *, 4> Inputs;
if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
@@ -2793,8 +2814,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
- if (LF.PostIncLoop) {
- if (!L->contains(LF.UserInst))
+ if (LF.PostIncLoops.count(L)) {
+ if (LF.isUseFullyOutsideLoop(L))
Inputs.push_back(L->getLoopLatch()->getTerminator());
else
Inputs.push_back(IVIncInsertPos);
@@ -2831,7 +2852,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
- Rewriter.setPostInc(LF.PostIncLoop);
+ Rewriter.setPostInc(LF.PostIncLoops);
// This is the type that the user actually needs.
const Type *OpTy = LF.OperandValToReplace->getType();
@@ -2855,24 +2876,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
const SCEV *Reg = *I;
assert(!Reg->isZero() && "Zero allocated in a base register!");
- // If we're expanding for a post-inc user for the add-rec's loop, make the
- // post-inc adjustment.
- const SCEV *Start = Reg;
- while (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Start)) {
- if (AR->getLoop() == LF.PostIncLoop) {
- Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
- // If the user is inside the loop, insert the code after the increment
- // so that it is dominated by its operand. If the original insert point
- // was already dominated by the increment, keep it, because there may
- // be loop-variant operands that need to be respected also.
- if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
- IP = IVIncInsertPos;
- while (isa<DbgInfoIntrinsic>(IP)) ++IP;
- }
- break;
- }
- Start = AR->getStart();
- }
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ Reg = TransformForPostIncUse(Denormalize, Reg,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
@@ -2889,11 +2897,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (F.AM.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
- // If we're expanding for a post-inc user for the add-rec's loop, make the
- // post-inc adjustment.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ScaledS))
- if (AR->getLoop() == LF.PostIncLoop)
- ScaledS = SE.getAddExpr(ScaledS, AR->getStepRecurrence(SE));
+ // If we're expanding for a post-inc user, make the post-inc adjustment.
+ PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+ ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+ LF.UserInst, LF.OperandValToReplace,
+ Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
// An interesting way of "folding" with an icmp is to use a negated
@@ -2954,7 +2962,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
// We're done expanding now, so reset the rewriter.
- Rewriter.setPostInc(0);
+ Rewriter.clearPostInc();
// An ICmpZero Formula represents an ICmp which we're handling as a
// comparison against zero. Now that we've expanded an expression for that