diff options
author | Nadav Rotem <nrotem@apple.com> | 2012-12-11 18:58:10 +0000 |
---|---|---|
committer | Nadav Rotem <nrotem@apple.com> | 2012-12-11 18:58:10 +0000 |
commit | 5e9efa10fc7eea582bb1ad216364fb2c467e4477 (patch) | |
tree | 33a68bbe3bbfcb608a9f31cd49d104ed5929e5af | |
parent | 37a9830d20018dee5557a3a72f750d965ba383c5 (diff) |
Loop Vectorize: optimize the vectorization of trunc(induction_var). The truncation is now done on scalars.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169904 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 14 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/cast-induction.ll | 30 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/cpp-new-array.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/gcc-examples.ll | 2 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/induction_plus.ll | 3 |
5 files changed, 46 insertions, 5 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a69124032b..9550e8a04c 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1204,8 +1204,20 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - /// Vectorize bitcasts. CastInst *CI = dyn_cast<CastInst>(it); + /// Optimize the special case where the source is the induction + /// variable. Notice that we can only optimize the 'trunc' case + /// because: a. FP conversions lose precision, b. sext/zext may wrap, + /// c. other casts depend on pointer size. + if (CI->getOperand(0) == OldInduction && + it->getOpcode() == Instruction::Trunc) { + Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction, + CI->getType()); + Value *Broadcasted = getBroadcastInstrs(ScalarCast); + WidenMap[it] = getConsecutiveVector(Broadcasted); + break; + } + /// Vectorize casts. Value *A = getVectorValue(it->getOperand(0)); Type *DestTy = VectorType::get(CI->getType()->getScalarType(), VF); WidenMap[it] = Builder.CreateCast(CI->getOpcode(), A, DestTy); diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll new file mode 100644 index 0000000000..fc8281ba8a --- /dev/null +++ b/test/Transforms/LoopVectorize/cast-induction.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s + +; rdar://problem/12848162 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 + +;CHECK: @example12 +;CHECK: trunc i64 +;CHECK: store <4 x i32> +;CHECK: ret void +define void @example12() nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %3 = trunc i64 %indvars.iv to i32 + store i32 %3, i32* %2, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 1024 + br i1 %exitcond, label %4, label %1 + +; <label>:4 ; preds = %1 + ret void +} + diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll index 26902eba9e..af59963899 100644 --- a/test/Transforms/LoopVectorize/cpp-new-array.ll +++ b/test/Transforms/LoopVectorize/cpp-new-array.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-macosx10.8.0" ;CHECK: @cpp_new_arrays -;CHECK: insertelement <4 x i32> +;CHECK: sext i32 ;CHECK: load <4 x float> ;CHECK: fadd <4 x float> ;CHECK: ret i32 diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll index f1bf6cb6d8..4fabc8e2d3 100644 --- a/test/Transforms/LoopVectorize/gcc-examples.ll +++ b/test/Transforms/LoopVectorize/gcc-examples.ll @@ -329,7 +329,7 @@ define void @example11() nounwind uwtable ssp { } ;CHECK: @example12 -;CHECK: trunc <4 x i64> +;CHECK: trunc i64 ;CHECK: store <4 x i32> ;CHECK: ret void define void @example12() nounwind uwtable ssp { diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll index b31bceb50d..a25845f1c1 100644 --- a/test/Transforms/LoopVectorize/induction_plus.ll +++ b/test/Transforms/LoopVectorize/induction_plus.ll @@ -6,8 +6,7 @@ target triple = "x86_64-apple-macosx10.8.0" @array = common global [1024 x i32] zeroinitializer, align 16 ;CHECK: @array_at_plus_one -;CHECK: add <4 x i64> -;CHECK: trunc <4 x i64> +;CHECK: trunc i64 ;CHECK: add i64 %index, 12 ;CHECK: ret i32 define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp { |