aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadav Rotem <nrotem@apple.com>2012-12-11 18:58:10 +0000
committerNadav Rotem <nrotem@apple.com>2012-12-11 18:58:10 +0000
commit5e9efa10fc7eea582bb1ad216364fb2c467e4477 (patch)
tree33a68bbe3bbfcb608a9f31cd49d104ed5929e5af
parent37a9830d20018dee5557a3a72f750d965ba383c5 (diff)
Loop Vectorize: optimize the vectorization of trunc(induction_var). The truncation is now done on scalars.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169904 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp14
-rw-r--r--test/Transforms/LoopVectorize/cast-induction.ll30
-rw-r--r--test/Transforms/LoopVectorize/cpp-new-array.ll2
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll2
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll3
5 files changed, 46 insertions, 5 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index a69124032b..9550e8a04c 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- /// Vectorize bitcasts.
CastInst *CI = dyn_cast<CastInst>(it);
+ /// Optimize the special case where the source is the induction
+ /// variable. Notice that we can only optimize the 'trunc' case
+ /// because: a. FP conversions lose precision, b. sext/zext may wrap,
+ /// c. other casts depend on pointer size.
+ if (CI->getOperand(0) == OldInduction &&
+ it->getOpcode() == Instruction::Trunc) {
+ Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
+ CI->getType());
+ Value *Broadcasted = getBroadcastInstrs(ScalarCast);
+ WidenMap[it] = getConsecutiveVector(Broadcasted);
+ break;
+ }
+ /// Vectorize casts.
Value *A = getVectorValue(it->getOperand(0));
Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF);
WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy);
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
new file mode 100644
index 0000000000..fc8281ba8a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+; rdar://problem/12848162
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example12
+;CHECK: trunc i64
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = trunc i64 %indvars.iv to i32
+ store i32 %3, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %4, label %1
+
+; <label>:4 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
index 26902eba9e..af59963899 100644
--- a/test/Transforms/LoopVectorize/cpp-new-array.ll
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
;CHECK: @cpp_new_arrays
-;CHECK: insertelement <4 x i32>
+;CHECK: sext i32
;CHECK: load <4 x float>
;CHECK: fadd <4 x float>
;CHECK: ret i32
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index f1bf6cb6d8..4fabc8e2d3 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -329,7 +329,7 @@ define void @example11() nounwind uwtable ssp {
}
;CHECK: @example12
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
;CHECK: store <4 x i32>
;CHECK: ret void
define void @example12() nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index b31bceb50d..a25845f1c1 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@array = common global [1024 x i32] zeroinitializer, align 16
;CHECK: @array_at_plus_one
-;CHECK: add <4 x i64>
-;CHECK: trunc <4 x i64>
+;CHECK: trunc i64
;CHECK: add i64 %index, 12
;CHECK: ret i32
define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {