aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-04-13 16:51:03 +0000
committerDan Gohman <gohman@apple.com>2010-04-13 16:51:03 +0000
commit28287794d4e9e1e07742f8a937cfc2be8af627c6 (patch)
tree77ae45cdc77d3c475a8d8c28bff2a7971c413103
parent3ab131243e0150854df2f94ea3f472cb7ca00236 (diff)
Teach ScalarEvolution to simplify smax and umax when it can prove
that one operand is always greater than another. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101142 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/ScalarEvolution.cpp16
-rw-r--r--test/Transforms/IndVarSimplify/eliminate-max.ll52
2 files changed, 66 insertions, 2 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 307c83a2e1..a9a444f93a 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -2117,7 +2117,13 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- if (Ops[i] == Ops[i+1]) { // X smax Y smax Y --> X smax Y
+ // X smax Y smax Y --> X smax Y
+ // X smax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--i; --e;
}
@@ -2216,7 +2222,13 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
// so, delete one. Since we sorted the list, these values are required to
// be adjacent.
for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
- if (Ops[i] == Ops[i+1]) { // X umax Y umax Y --> X umax Y
+ // X umax Y umax Y --> X umax Y
+ // X umax Y --> X, if X is always greater than Y
+ if (Ops[i] == Ops[i+1] ||
+ isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+ --i; --e;
+ } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--i; --e;
}
diff --git a/test/Transforms/IndVarSimplify/eliminate-max.ll b/test/Transforms/IndVarSimplify/eliminate-max.ll
new file mode 100644
index 0000000000..c25bd0e354
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/eliminate-max.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -S -indvars | grep {= icmp} | count 3
+; PR4914.ll
+
+; Indvars should be able to do range analysis and eliminate icmps.
+; There are two here which cannot be eliminated.
+; There's one that icmp which can be eliminated and which indvars currently
+; cannot eliminate, because it requires analyzing more than just the
+; range of the induction variable.
+
+@0 = private constant [4 x i8] c"%d\0A\00", align 1 ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb14, %bb
+ %t = phi i32 [ 0, %bb ], [ %t19, %bb14 ] ; <i32> [#uses=5]
+ %t2 = phi i32 [ 0, %bb ], [ %t18, %bb14 ] ; <i32> [#uses=1]
+ %t3 = icmp slt i32 %t, 0 ; <i1> [#uses=1]
+ br i1 %t3, label %bb7, label %bb4
+
+bb4: ; preds = %bb1
+ %t5 = icmp sgt i32 %t, 255 ; <i1> [#uses=1]
+ %t6 = select i1 %t5, i32 255, i32 %t ; <i32> [#uses=1]
+ br label %bb7
+
+bb7: ; preds = %bb4, %bb1
+ %t8 = phi i32 [ %t6, %bb4 ], [ 0, %bb1 ] ; <i32> [#uses=1]
+ %t9 = sub i32 0, %t ; <i32> [#uses=3]
+ %t10 = icmp slt i32 %t9, 0 ; <i1> [#uses=1]
+ br i1 %t10, label %bb14, label %bb11
+
+bb11: ; preds = %bb7
+ %t12 = icmp sgt i32 %t9, 255 ; <i1> [#uses=1]
+ %t13 = select i1 %t12, i32 255, i32 %t9 ; <i32> [#uses=1]
+ br label %bb14
+
+bb14: ; preds = %bb11, %bb7
+ %t15 = phi i32 [ %t13, %bb11 ], [ 0, %bb7 ] ; <i32> [#uses=1]
+ %t16 = add nsw i32 %t2, 255 ; <i32> [#uses=1]
+ %t17 = add nsw i32 %t16, %t8 ; <i32> [#uses=1]
+ %t18 = add nsw i32 %t17, %t15 ; <i32> [#uses=2]
+ %t19 = add nsw i32 %t, 1 ; <i32> [#uses=2]
+ %t20 = icmp slt i32 %t19, 1000000000 ; <i1> [#uses=1]
+ br i1 %t20, label %bb1, label %bb21
+
+bb21: ; preds = %bb14
+ %t22 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([4 x i8]* @0, i32 0, i32 0), i32 %t18) nounwind
+ ret i32 0
+}
+
+declare i32 @printf(i8* noalias nocapture, ...) nounwind