diff options
-rw-r--r-- | lib/Transforms/Scalar/IndVarSimplify.cpp | 71 | ||||
-rw-r--r-- | test/Transforms/IndVarSimplify/eliminate-rem.ll | 121 |
2 files changed, 192 insertions, 0 deletions
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 2693f3a9a9..21cb73cbf2 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -98,6 +98,7 @@ namespace { private: void EliminateIVComparisons(); + void EliminateIVRemainders(); void RewriteNonIntegerIVs(Loop *L); ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount, @@ -398,6 +399,73 @@ void IndVarSimplify::EliminateIVComparisons() { RecursivelyDeleteTriviallyDeadInstructions(Inst); } +void IndVarSimplify::EliminateIVRemainders() { + SmallVector<WeakVH, 16> DeadInsts; + + // Look for SRem and URem users. + for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { + IVStrideUse &UI = *I; + BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser()); + if (!Rem) continue; + + bool isSigned = Rem->getOpcode() == Instruction::SRem; + if (!isSigned && Rem->getOpcode() != Instruction::URem) + continue; + + // We're only interested in the case where we know something about + // the numerator. + if (UI.getOperandValToReplace() != Rem->getOperand(0)) + continue; + + // Get the SCEVs for the ICmp operands. + const SCEV *S = SE->getSCEV(Rem->getOperand(0)); + const SCEV *X = SE->getSCEV(Rem->getOperand(1)); + + // Simplify unnecessary loops away. + const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent()); + S = SE->getSCEVAtScope(S, ICmpLoop); + X = SE->getSCEVAtScope(X, ICmpLoop); + + // i % n --> i if i is in [0,n). + if ((!isSigned || SE->isKnownNonNegative(S)) && + SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + S, X)) + Rem->replaceAllUsesWith(Rem->getOperand(0)); + else { + // (i+1) % n --> (i+1)==n?0:(i+1) if i is in [0,n). + const SCEV *LessOne = + SE->getMinusSCEV(S, SE->getIntegerSCEV(1, S->getType())); + if ((!isSigned || SE->isKnownNonNegative(LessOne)) && + SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, + LessOne, X)) { + ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ, + Rem->getOperand(0), Rem->getOperand(1), + "tmp"); + SelectInst *Sel = + SelectInst::Create(ICmp, + ConstantInt::get(Rem->getType(), 0), + Rem->getOperand(0), "tmp", Rem); + Rem->replaceAllUsesWith(Sel); + } else + continue; + } + + // Inform IVUsers about the new users. + if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0))) + IU->AddUsersIfInteresting(I); + + DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); + DeadInsts.push_back(Rem); + } + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); +} + bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { IU = &getAnalysis<IVUsers>(); LI = &getAnalysis<LoopInfo>(); @@ -427,6 +495,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Simplify ICmp IV users. EliminateIVComparisons(); + // Simplify SRem and URem IV users. + EliminateIVRemainders(); + // Compute the type of the largest recurrence expression, and decide whether // a canonical induction variable should be inserted. const Type *LargestType = 0; diff --git a/test/Transforms/IndVarSimplify/eliminate-rem.ll b/test/Transforms/IndVarSimplify/eliminate-rem.ll new file mode 100644 index 0000000000..f756389398 --- /dev/null +++ b/test/Transforms/IndVarSimplify/eliminate-rem.ll @@ -0,0 +1,121 @@ +; RUN: opt -indvars -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; Indvars should be able to eliminate this srem. +; CHECK: @simple +; CHECK-NOT: rem +; CHECK: ret + +define void @simple(i64 %arg, double* %arg3) nounwind { +bb: + %t = icmp slt i64 0, %arg ; <i1> [#uses=1] + br i1 %t, label %bb4, label %bb12 + +bb4: ; preds = %bb + br label %bb5 + +bb5: ; preds = %bb4, %bb5 + %t6 = phi i64 [ %t9, %bb5 ], [ 0, %bb4 ] ; <i64> [#uses=2] + %t7 = srem i64 %t6, %arg ; <i64> [#uses=1] + %t8 = getelementptr inbounds double* %arg3, i64 %t7 ; <double*> [#uses=1] + store double 0.000000e+00, double* %t8 + %t9 = add nsw i64 %t6, 1 ; <i64> [#uses=2] + %t10 = icmp slt i64 %t9, %arg ; <i1> [#uses=1] + br i1 %t10, label %bb5, label %bb11 + +bb11: ; preds = %bb5 + br label %bb12 + +bb12: ; preds = %bb11, %bb + ret void +} + +; Indvars should be able to eliminate the (i+1)%n. +; CHECK: @f +; CHECK-NOT: rem +; CHECK: rem +; CHECK-NOT: rem +; CHECK: ret + +define i32 @f(i64* %arg, i64 %arg1, i64 %arg2, i64 %arg3) nounwind { +bb: + %t = icmp sgt i64 %arg1, 0 ; <i1> [#uses=1] + br i1 %t, label %bb4, label %bb54 + +bb4: ; preds = %bb + br label %bb5 + +bb5: ; preds = %bb49, %bb4 + %t6 = phi i64 [ %t51, %bb49 ], [ 0, %bb4 ] ; <i64> [#uses=4] + %t7 = phi i32 [ %t50, %bb49 ], [ 0, %bb4 ] ; <i32> [#uses=2] + %t8 = add nsw i64 %t6, %arg1 ; <i64> [#uses=1] + %t9 = add nsw i64 %t8, -2 ; <i64> [#uses=1] + %t10 = srem i64 %t9, %arg1 ; <i64> [#uses=1] + %t11 = add nsw i64 %t10, 1 ; <i64> [#uses=1] + %t12 = add nsw i64 %t6, 1 ; <i64> [#uses=1] + %t13 = srem i64 %t12, %arg1 ; <i64> [#uses=1] + %t14 = icmp sgt i64 %arg1, 0 ; <i1> [#uses=1] + br i1 %t14, label %bb15, label %bb49 + +bb15: ; preds = %bb5 + br label %bb16 + +bb16: ; preds = %bb44, %bb15 + %t17 = phi i64 [ %t46, %bb44 ], [ 0, %bb15 ] ; <i64> [#uses=1] + %t18 = phi i32 [ %t45, %bb44 ], [ %t7, %bb15 ] ; <i32> [#uses=2] + %t19 = icmp sgt i64 %arg1, 0 ; <i1> [#uses=1] + br i1 %t19, label %bb20, label %bb44 + +bb20: ; preds = %bb16 + br label %bb21 + +bb21: ; preds = %bb21, %bb20 + %t22 = phi i64 [ %t41, %bb21 ], [ 0, %bb20 ] ; <i64> [#uses=4] + %t23 = phi i32 [ %t40, %bb21 ], [ %t18, %bb20 ] ; <i32> [#uses=1] + %t24 = mul i64 %t6, %arg1 ; <i64> [#uses=1] + %t25 = mul i64 %t13, %arg1 ; <i64> [#uses=1] + %t26 = add nsw i64 %t24, %t22 ; <i64> [#uses=1] + %t27 = mul i64 %t11, %arg1 ; <i64> [#uses=1] + %t28 = add nsw i64 %t25, %t22 ; <i64> [#uses=1] + %t29 = getelementptr inbounds i64* %arg, i64 %t26 ; <i64*> [#uses=1] + %t30 = add nsw i64 %t27, %t22 ; <i64> [#uses=1] + %t31 = getelementptr inbounds i64* %arg, i64 %t28 ; <i64*> [#uses=1] + %t32 = zext i32 %t23 to i64 ; <i64> [#uses=1] + %t33 = load i64* %t29 ; <i64> [#uses=1] + %t34 = getelementptr inbounds i64* %arg, i64 %t30 ; <i64*> [#uses=1] + %t35 = load i64* %t31 ; <i64> [#uses=1] + %t36 = add nsw i64 %t32, %t33 ; <i64> [#uses=1] + %t37 = add nsw i64 %t36, %t35 ; <i64> [#uses=1] + %t38 = load i64* %t34 ; <i64> [#uses=1] + %t39 = add nsw i64 %t37, %t38 ; <i64> [#uses=1] + %t40 = trunc i64 %t39 to i32 ; <i32> [#uses=2] + %t41 = add nsw i64 %t22, 1 ; <i64> [#uses=2] + %t42 = icmp slt i64 %t41, %arg1 ; <i1> [#uses=1] + br i1 %t42, label %bb21, label %bb43 + +bb43: ; preds = %bb21 + br label %bb44 + +bb44: ; preds = %bb43, %bb16 + %t45 = phi i32 [ %t18, %bb16 ], [ %t40, %bb43 ] ; <i32> [#uses=2] + %t46 = add nsw i64 %t17, 1 ; <i64> [#uses=2] + %t47 = icmp slt i64 %t46, %arg1 ; <i1> [#uses=1] + br i1 %t47, label %bb16, label %bb48 + +bb48: ; preds = %bb44 + br label %bb49 + +bb49: ; preds = %bb48, %bb5 + %t50 = phi i32 [ %t7, %bb5 ], [ %t45, %bb48 ] ; <i32> [#uses=2] + %t51 = add nsw i64 %t6, 1 ; <i64> [#uses=2] + %t52 = icmp slt i64 %t51, %arg1 ; <i1> [#uses=1] + br i1 %t52, label %bb5, label %bb53 + +bb53: ; preds = %bb49 + br label %bb54 + +bb54: ; preds = %bb53, %bb + %t55 = phi i32 [ 0, %bb ], [ %t50, %bb53 ] ; <i32> [#uses=1] + ret i32 %t55 +} |