diff options
author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-03-14 18:54:36 +0000 |
---|---|---|
committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-03-14 18:54:36 +0000 |
commit | d517da33b712b5d8d687ee5e5974056a4787ec4f (patch) | |
tree | b94e6e12160060e1c44a3b0aebbc8016d5b130fc | |
parent | 862146b6077f017faa2b2113768e723891a06494 (diff) |
LoopVectorize: Invert case when we use a vector cmp value to query select cost
We generate a select with a vectorized condition argument when the condition is
NOT loop invariant. Not the other way around.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177098 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Transforms/Vectorize/LoopVectorize.cpp | 2 | ||||
-rw-r--r-- | test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll | 62 |
2 files changed, 63 insertions, 1 deletions
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 07dd453424..930d9c412f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll new file mode 100644 index 0000000000..d1d23aa92c --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s + +; Make sure we use the right select kind when querying select costs. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 + +; CHECK: Checking a loop in "scalarselect" +define void @scalarselect(i1 %cond) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv +; A scalar select has a cost of 1 on core2 +; CHECK: cost of 1 for VF 2 {{.*}} select i1 %cond, i32 %6, i32 0 + %sel = select i1 %cond, i32 %6, i32 zeroinitializer + store i32 %sel, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %8, label %1 + +; <label>:8 ; preds = %1 + ret void +} + +; CHECK: Checking a loop in "vectorselect" +define void @vectorselect(i1 %cond) nounwind uwtable ssp { + br label %1 + +; <label>:1 ; preds = %1, %0 + %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ] + %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv + %3 = load i32* %2, align 4 + %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv + %5 = load i32* %4, align 4 + %6 = add nsw i32 %5, %3 + %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv + %8 = icmp ult i64 %indvars.iv, 8 +; A vector select has a cost of 4 on core2 +; CHECK: cost of 4 for VF 2 {{.*}} select i1 %8, i32 %6, i32 0 + %sel = select i1 %8, i32 %6, i32 zeroinitializer + store i32 %sel, i32* %7, align 4 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, 256 + br i1 %exitcond, label %9, label %1 + +; <label>:9 ; preds = %1 + ret void +} + |