diff options
author | Andrew Trick <atrick@apple.com> | 2011-05-25 04:42:22 +0000 |
---|---|---|
committer | Andrew Trick <atrick@apple.com> | 2011-05-25 04:42:22 +0000 |
commit | 03d3d3b361800f28c75d3386978d22e6d57744b7 (patch) | |
tree | 1aee4d19a8b79ab0bd0cd5802b51b85a1b04e4cb | |
parent | 47268164f3d660f6357cc3a59d510efe3bc9152f (diff) |
indvars: fixed IV cloning in -disable-iv-rewrite mode with associated
cleanup and overdue test cases.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@132038 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 166 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/elim-extend.ll | 93 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/no-iv-rewrite.ll | 123 |
3 files changed, 330 insertions, 52 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index bc3bc303ae..e45ef3a808 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -67,6 +67,9 @@ STATISTIC(NumWidened , "Number of indvars widened"); STATISTIC(NumInserted, "Number of canonical indvars added"); STATISTIC(NumReplaced, "Number of exit values replaced"); STATISTIC(NumLFTR , "Number of loop exit tests replaced"); +STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated"); +STATISTIC(NumElimRem , "Number of IV remainder operations eliminated"); +STATISTIC(NumElimCmp , "Number of IV comparisons eliminated"); // DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis // and referenced here. @@ -117,9 +120,6 @@ namespace { PHINode *IVPhi); void RewriteNonIntegerIVs(Loop *L); - bool canExpandBackedgeTakenCount(Loop *L, - const SCEV *BackedgeTakenCount); - ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter); @@ -200,9 +200,8 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction /// sequence that can be used by LinearFunctionTestReplace. -bool IndVarSimplify:: -canExpandBackedgeTakenCount(Loop *L, - const SCEV *BackedgeTakenCount) { +static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { + const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) || BackedgeTakenCount->isZero()) return false; @@ -235,6 +234,36 @@ canExpandBackedgeTakenCount(Loop *L, return true; } +/// getBackedgeIVType - Get the widest type used by the loop test after peeking +/// through Truncs. +/// +/// TODO: Unnecessary once LinearFunctionTestReplace is removed. +static const Type *getBackedgeIVType(Loop *L) { + if (!L->getExitingBlock()) + return 0; + + // Can't rewrite non-branch yet. + BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator()); + if (!BI) + return 0; + + ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition()); + if (!Cond) + return 0; + + const Type *Ty = 0; + for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end(); + OI != OE; ++OI) { + assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types"); + TruncInst *Trunc = dyn_cast<TruncInst>(*OI); + if (!Trunc) + continue; + + return Trunc->getSrcTy(); + } + return Ty; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -245,7 +274,7 @@ LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, PHINode *IndVar, SCEVExpander &Rewriter) { - assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) && "precondition"); + assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator()); // If the exiting block is not the same as the backedge block, we must compare @@ -536,12 +565,12 @@ public: bool CreateWideIV(SCEVExpander &Rewriter); protected: - const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); - Instruction *CloneIVUser(Instruction *NarrowUse, Instruction *NarrowDef, Instruction *WideDef); + const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse); + Instruction *WidenIVUse(Instruction *NarrowUse, Instruction *NarrowDef, Instruction *WideDef); @@ -594,24 +623,10 @@ void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) { } } -// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' -// perspective after widening it's type? In other words, can the extend be -// safely hoisted out of the loop with SCEV reducing the value to a recurrence -// on the same loop. If so, return the sign or zero extended -// recurrence. Otherwise return NULL. -const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { - if (!SE->isSCEVable(NarrowUse->getType())) - return 0; - - const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); - const SCEV *WideExpr = IsSigned ? - SE->getSignExtendExpr(NarrowExpr, WideType) : - SE->getZeroExtendExpr(NarrowExpr, WideType); - const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); - if (!AddRec || AddRec->getLoop() != L) - return 0; - - return AddRec; +static Value *getExtend( Value *NarrowOper, const Type *WideType, + bool IsSigned, IRBuilder<> &Builder) { + return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) : + Builder.CreateZExt(NarrowOper, WideType); } /// CloneIVUser - Instantiate a wide operation to replace a narrow @@ -636,36 +651,51 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse, case Instruction::AShr: DEBUG(dbgs() << "Cloning IVUser: " << *NarrowUse << "\n"); + IRBuilder<> Builder(NarrowUse); + + // Replace NarrowDef operands with WideDef. Otherwise, we don't know + // anything about the narrow operand yet so must insert a [sz]ext. It is + // probably loop invariant and will be folded or hoisted. If it actually + // comes from a widened IV, it should be removed during a future call to + // WidenIVUse. + Value *LHS = (NarrowUse->getOperand(0) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(0), WideType, IsSigned, Builder); + Value *RHS = (NarrowUse->getOperand(1) == NarrowDef) ? WideDef : + getExtend(NarrowUse->getOperand(1), WideType, IsSigned, Builder); + BinaryOperator *NarrowBO = cast<BinaryOperator>(NarrowUse); BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), - NarrowBO->getOperand(0), - NarrowBO->getOperand(1), + LHS, RHS, NarrowBO->getName()); - IRBuilder<> Builder(NarrowUse); Builder.Insert(WideBO); if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap(); if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap(); - for (unsigned i = 0; i < NarrowBO->getNumOperands(); ++i) { - Value *NarrowOper = NarrowBO->getOperand(i); - if (NarrowOper == NarrowDef) { - WideBO->setOperand(i, WideDef); - continue; - } - // We don't know anything about the other operand here so must insert a - // [sz]ext. It is probably loop invariant and will be folded or - // hoisted. If it actually comes from a widened IV, it should be removed - // during a future call to WidenIVUse. - IRBuilder<> Builder(NarrowUse); - Value *Extend = IsSigned ? - Builder.CreateSExt(NarrowOper, WideType) : - Builder.CreateZExt(NarrowOper, WideType); - WideBO->setOperand(i, Extend); - } + return WideBO; } llvm_unreachable(0); } +// GetWideRecurrence - Is this instruction potentially interesting from IVUsers' +// perspective after widening it's type? In other words, can the extend be +// safely hoisted out of the loop with SCEV reducing the value to a recurrence +// on the same loop. If so, return the sign or zero extended +// recurrence. Otherwise return NULL. +const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) { + if (!SE->isSCEVable(NarrowUse->getType())) + return 0; + + const SCEV *NarrowExpr = SE->getSCEV(NarrowUse); + const SCEV *WideExpr = IsSigned ? + SE->getSignExtendExpr(NarrowExpr, WideType) : + SE->getZeroExtendExpr(NarrowExpr, WideType); + const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr); + if (!AddRec || AddRec->getLoop() != L) + return 0; + + return AddRec; +} + /// WidenIVUse - Determine whether an individual user of the narrow IV can be /// widened. If so, return the wide clone of the user. Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, @@ -682,9 +712,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, // Our raison d'etre! Eliminate sign and zero extension. if (IsSigned ? isa<SExtInst>(NarrowUse) : isa<ZExtInst>(NarrowUse)) { - NarrowUse->replaceAllUsesWith(WideDef); - DeadInsts.push_back(NarrowUse); - + Value *NewDef = WideDef; + if (NarrowUse->getType() != WideType) { + unsigned CastWidth = SE->getTypeSizeInBits(NarrowUse->getType()); + unsigned IVWidth = SE->getTypeSizeInBits(WideType); + if (CastWidth < IVWidth) { + // The cast isn't as wide as the IV, so insert a Trunc. + IRBuilder<> Builder(NarrowUse); + NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + } + else { + // A wider extend was hidden behind a narrower one. This may induce + // another round of IV widening in which the intermediate IV becomes + // dead. It should be very rare. + DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi + << " not wide enough to subsume " << *NarrowUse << "\n"); + NarrowUse->replaceUsesOfWith(NarrowDef, WideDef); + NewDef = NarrowUse; + } + } + if (NewDef != NarrowUse) { + DEBUG(dbgs() << "INDVARS: eliminating " << *NarrowUse + << " replaced by " << *WideDef << "\n"); + ++NumElimExt; + NarrowUse->replaceAllUsesWith(NewDef); + DeadInsts.push_back(NarrowUse); + } // Now that the extend is gone, expose it's uses to IVUsers for potential // further simplification within SimplifyIVUsers. IU->AddUsersIfInteresting(WideDef, WidePhi); @@ -698,7 +751,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse, // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. IRBuilder<> Builder(NarrowUse); - Value *Trunc = Builder.CreateTrunc(WideDef, NarrowUse->getType()); + Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType()); NarrowUse->replaceUsesOfWith(NarrowDef, Trunc); return 0; } @@ -834,6 +887,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) { return; DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); + ++NumElimCmp; Changed = true; DeadInsts.push_back(ICmp); } @@ -888,6 +942,7 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem, IU->AddUsersIfInteresting(I, IVPhi); DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + ++NumElimRem; Changed = true; DeadInsts.push_back(Rem); } @@ -940,13 +995,20 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // a canonical induction variable should be inserted. const Type *LargestType = 0; bool NeedCannIV = false; - bool ExpandBECount = canExpandBackedgeTakenCount(L, BackedgeTakenCount); + bool ExpandBECount = canExpandBackedgeTakenCount(L, SE); if (ExpandBECount) { // If we have a known trip count and a single exit block, we'll be // rewriting the loop exit test condition below, which requires a // canonical induction variable. NeedCannIV = true; const Type *Ty = BackedgeTakenCount->getType(); + if (DisableIVRewrite) { + // In this mode, SimplifyIVUsers may have already widened the IV used by + // the backedge test and inserted a Trunc on the compare's operand. Get + // the wider type to avoid creating a redundant narrow IV only used by the + // loop test. + LargestType = getBackedgeIVType(L); + } if (!LargestType || SE->getTypeSizeInBits(Ty) > SE->getTypeSizeInBits(LargestType)) @@ -1002,7 +1064,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // using it. We can currently only handle loops with a single exit. ICmpInst *NewICmp = 0; if (ExpandBECount) { - assert(canExpandBackedgeTakenCount(L, BackedgeTakenCount) && + assert(canExpandBackedgeTakenCount(L, SE) && "canonical IV disrupted BackedgeTaken expansion"); assert(NeedCannIV && "LinearFunctionTestReplace requires a canonical induction variable"); diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll new file mode 100644 index 0000000000..b494a19413 --- /dev/null +++ b/test/Transforms/IndVarSimplify/elim-extend.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Tests sign extend elimination in the inner and outer loop. +; %outercount is straightforward to widen, besides being in an outer loop. +; %innercount is currently blocked by lcssa, so is not widened. +; %inneriv can be widened only after proving it has no signed-overflow +; based on the loop test. +define void @nestedIV(i8* %address, i32 %limit) nounwind { +entry: + %limitdec = add i32 %limit, -1 + br label %outerloop + +; CHECK: outerloop: +; +; Eliminate %ofs1 after widening outercount. +; CHECK-NOT: sext +; CHECK: getelementptr +; +; IV rewriting hoists a gep into this block. We don't like that. +; CHECK-NOT: getelementptr +outerloop: + %outercount = phi i32 [ %outerpostcount, %outermerge ], [ 0, %entry ] + %innercount = phi i32 [ %innercount.merge, %outermerge ], [ 0, %entry ] + + %outercountdec = add i32 %outercount, -1 + %ofs1 = sext i32 %outercountdec to i64 + %adr1 = getelementptr i8* %address, i64 %ofs1 + store i8 0, i8* %adr1 + + br label %innerpreheader + +innerpreheader: + %innerprecmp = icmp sgt i32 %limitdec, %innercount + br i1 %innerprecmp, label %innerloop, label %outermerge + +; CHECK: innerloop: +; +; Eliminate %ofs2 after widening inneriv. +; CHECK-NOT: sext +; CHECK: getelementptr +; +; FIXME: We should not increase the number of IVs in this loop. +; sext elimination plus LFTR results in 3 final IVs. +; +; FIXME: eliminate %ofs3 based the loop pre/post conditions +; even though innerpostiv is not NSW, thus sign extending innerpostiv +; does not yield the same expression as incrementing the widened inneriv. +innerloop: + %inneriv = phi i32 [ %innerpostiv, %innerloop ], [ %innercount, %innerpreheader ] + %innerpostiv = add i32 %inneriv, 1 + + %ofs2 = sext i32 %inneriv to i64 + %adr2 = getelementptr i8* %address, i64 %ofs2 + store i8 0, i8* %adr2 + + %ofs3 = sext i32 %innerpostiv to i64 + %adr3 = getelementptr i8* %address, i64 %ofs3 + store i8 0, i8* %adr3 + + %innercmp = icmp sgt i32 %limitdec, %innerpostiv + br i1 %innercmp, label %innerloop, label %innerexit + +innerexit: + %innercount.lcssa = phi i32 [ %innerpostiv, %innerloop ] + br label %outermerge + +; CHECK: outermerge: +; +; Eliminate %ofs4 after widening outercount +; CHECK-NOT: sext +; CHECK: getelementptr +; +; TODO: Eliminate %ofs5 after removing lcssa +outermerge: + %innercount.merge = phi i32 [ %innercount.lcssa, %innerexit ], [ %innercount, %innerpreheader ] + + %ofs4 = sext i32 %outercount to i64 + %adr4 = getelementptr i8* %address, i64 %ofs4 + store i8 0, i8* %adr3 + + %ofs5 = sext i32 %innercount.merge to i64 + %adr5 = getelementptr i8* %address, i64 %ofs5 + store i8 0, i8* %adr4 + + %outerpostcount = add i32 %outercount, 1 + %tmp47 = icmp slt i32 %outerpostcount, %limit + br i1 %tmp47, label %outerloop, label %return + +return: + ret void +} diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll new file mode 100644 index 0000000000..c35feef26f --- /dev/null +++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -0,0 +1,123 @@ +; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s +; +; Make sure that indvars isn't inserting canonical IVs. +; This is kinda hard to do until linear function test replacement is removed. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +define i32 @sum(i32* %arr, i32 %n) nounwind { +entry: + %precond = icmp slt i32 0, %n + br i1 %precond, label %ph, label %return + +ph: + br label %loop + +; CHECK: loop: +; +; We should only have 2 IVs. +; CHECK: phi +; CHECK: phi +; CHECK-NOT: phi +; +; sext should be eliminated while preserving gep inboundsness. +; CHECK-NOT: sext +; CHECK: getelementptr inbounds +loop: + %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ] + %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ] + %ofs = sext i32 %i.02 to i64 + %adr = getelementptr inbounds i32* %arr, i64 %ofs + %val = load i32* %adr + %sinc = add nsw i32 %s.01, %val + %iinc = add nsw i32 %i.02, 1 + %cond = icmp slt i32 %iinc, %n + br i1 %cond, label %loop, label %exit + +exit: + %s.lcssa = phi i32 [ %sinc, %loop ] + br label %return + +return: + %s.0.lcssa = phi i32 [ %s.lcssa, %exit ], [ 0, %entry ] + ret i32 %s.0.lcssa +} + +define i64 @suml(i32* %arr, i32 %n) nounwind { +entry: + %precond = icmp slt i32 0, %n + br i1 %precond, label %ph, label %return + +ph: + br label %loop + +; CHECK: loop: +; +; We should only have 2 IVs. +; CHECK: phi +; CHECK: phi +; CHECK-NOT: phi +; +; %ofs sext should be eliminated while preserving gep inboundsness. +; CHECK-NOT: sext +; CHECK: getelementptr inbounds +; %vall sext should obviously not be eliminated +; CHECK: sext +loop: + %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ] + %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ] + %ofs = sext i32 %i.02 to i64 + %adr = getelementptr inbounds i32* %arr, i64 %ofs + %val = load i32* %adr + %vall = sext i32 %val to i64 + %sinc = add nsw i64 %s.01, %vall + %iinc = add nsw i32 %i.02, 1 + %cond = icmp slt i32 %iinc, %n + br i1 %cond, label %loop, label %exit + +exit: + %s.lcssa = phi i64 [ %sinc, %loop ] + br label %return + +return: + %s.0.lcssa = phi i64 [ %s.lcssa, %exit ], [ 0, %entry ] + ret i64 %s.0.lcssa +} + +define void @outofbounds(i32* %first, i32* %last, i32 %idx) nounwind { + %precond = icmp ne i32* %first, %last + br i1 %precond, label %ph, label %return + +; CHECK: ph: +; It's not indvars' job to perform LICM on %ofs +; CHECK-NOT: sext +ph: + br label %loop + +; CHECK: loop: +; +; Preserve exactly one pointer type IV. +; CHECK: phi i32* +; CHECK-NOT: phi +; +; Don't create any extra adds. +; CHECK-NOT: add +; +; Preserve gep inboundsness, and don't factor it. +; CHECK: getelementptr inbounds i32* %ptriv, i32 1 +; CHECK-NOT: add +loop: + %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ] + %ofs = sext i32 %idx to i64 + %adr = getelementptr inbounds i32* %ptriv, i64 %ofs + store i32 3, i32* %adr + %ptrpost = getelementptr inbounds i32* %ptriv, i32 1 + %cond = icmp ne i32* %ptrpost, %last + br i1 %cond, label %loop, label %exit + +exit: + br label %return + +return: + ret void +} |