aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDan Gohman <gohman@apple.com>2010-04-23 01:55:05 +0000
committerDan Gohman <gohman@apple.com>2010-04-23 01:55:05 +0000
commitfafb890ee204d60456d0780ff55a149fa082eaea (patch)
tree3be1fcd844734fe3036a9393c946ceb1c76f4bce
parentc6863989fc268ee0ff1469e4856e2e9404e67336 (diff)
Fix LSR to tolerate cases where ScalarEvolution initially
misses an opportunity to fold add operands, but folds them after LSR has separated them out. This fixes rdar://7886751. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@102157 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Analysis/README.txt12
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp5
-rw-r--r--test/CodeGen/X86/lsr-delayed-fold.ll28
3 files changed, 44 insertions, 1 deletions
diff --git a/lib/Analysis/README.txt b/lib/Analysis/README.txt
index c401090272..88ea9f11ad 100644
--- a/lib/Analysis/README.txt
+++ b/lib/Analysis/README.txt
@@ -16,3 +16,15 @@ In addition to being much more complicated, it involves i65 arithmetic,
which is very inefficient when expanded into code.
//===---------------------------------------------------------------------===//
+
+In test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a09bca8997..a09b3dc5f8 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2060,8 +2060,11 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
LU.Kind, LU.AccessTy, TLI, SE))
continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
Formula F = Base;
- F.BaseRegs[i] = SE.getAddExpr(InnerAddOps);
+ F.BaseRegs[i] = InnerSum;
F.BaseRegs.push_back(*J);
if (InsertFormula(LU, LUIdx, F))
// If that formula hadn't been seen before, recurse to find more like
diff --git a/test/CodeGen/X86/lsr-delayed-fold.ll b/test/CodeGen/X86/lsr-delayed-fold.ll
new file mode 100644
index 0000000000..f160c2d92b
--- /dev/null
+++ b/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+; rdar://7886751
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12: ; preds = %bb11
+ %t = trunc i64 %arg5 to i32 ; <i32> [#uses=1]
+ %t13 = sub i64 0, %arg5 ; <i64> [#uses=1]
+ %t14 = and i64 %t13, 4294967295 ; <i64> [#uses=1]
+ br label %bb15
+
+bb15: ; preds = %bb21, %bb12
+ %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+ %t17 = mul i64 %t14, %t16 ; <i64> [#uses=1]
+ %t18 = add i64 undef, %t17 ; <i64> [#uses=1]
+ %t19 = trunc i64 %t18 to i32 ; <i32> [#uses=1]
+ %t22 = icmp eq i32 %t19, %t ; <i1> [#uses=1]
+ %t23 = add i64 %t16, 1 ; <i64> [#uses=1]
+ br i1 %t22, label %bb24, label %bb15
+
+bb24: ; preds = %bb21, %bb11
+ unreachable
+}