From 5d79bb8770a5a655af0dccc87b952c3ea9bad45e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 1 Mar 2013 19:07:31 +0000 Subject: LoopVectorize: Don't hang forever if a PHI only has skipped PHI uses. Fixes PR15384. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index a696a2ffba..ef9c7c9db2 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2724,6 +2724,9 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, // Is this a bin op ? FoundBinOp |= !isa(Iter); + // Remember the current instruction. + Instruction *OldIter = Iter; + // For each of the *users* of iter. for (Value::use_iterator it = Iter->use_begin(), e = Iter->use_end(); it != e; ++it) { @@ -2749,7 +2752,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, if (isa(Iter) && isa(U) && U->getParent() != TheLoop->getHeader() && TheLoop->contains(U) && - Iter->getNumUses() > 1) + Iter->hasNUsesOrMore(2)) continue; // We can't have multiple inside users. @@ -2769,6 +2772,10 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi, Iter = U; } + // If all uses were skipped this can't be a reduction variable. + if (Iter == OldIter) + return false; + // We found a reduction var if we have reached the original // phi node and we only have a single instruction with out-of-loop // users. -- cgit v1.2.3-70-g09d2 From 5290baacb8ca4fb75d798e873a441cad11cbfb2c Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 2 Mar 2013 01:33:49 +0000 Subject: PR14448 - prevent the loop vectorizer from vectorizing the same loop twice. The LoopVectorizer often runs multiple times on the same function due to inlining. When this happens the loop vectorizer often vectorizes the same loops multiple times, increasing code size and adding unneeded branches. With this patch, the vectorizer during vectorization puts metadata on scalar loops and marks them as 'already vectorized' so that it knows to ignore them when it sees them a second time. PR14448. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 18 ++++++ test/Transforms/LoopVectorize/vectorize-once.ll | 75 +++++++++++++++++++++++++ 2 files changed, 93 insertions(+) create mode 100644 test/Transforms/LoopVectorize/vectorize-once.ll (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index ef9c7c9db2..0d11372808 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -116,6 +116,12 @@ static const unsigned TinyTripCountUnrollThreshold = 128; /// number of pointers. Notice that the check is quadratic! static const unsigned RuntimeMemoryCheckThreshold = 4; +/// We use a metadata with this name to indicate that a scalar loop was +/// vectorized and that we don't need to re-vectorize it if we run into it +/// again. +static const char* +AlreadyVectorizedMDName = "llvm.vectorizer.already_vectorized"; + namespace { // Forward declarations. @@ -1159,6 +1165,11 @@ InnerLoopVectorizer::createEmptyLoop(LoopVectorizationLegality *Legal) { BasicBlock *ExitBlock = OrigLoop->getExitBlock(); assert(ExitBlock && "Must have an exit block"); + // Mark the old scalar loop with metadata that tells us not to vectorize this + // loop again if we run into it. + MDNode *MD = MDNode::get(OldBasicBlock->getContext(), ArrayRef()); + OldBasicBlock->getTerminator()->setMetadata(AlreadyVectorizedMDName, MD); + // Some loops have a single integer induction variable, while other loops // don't. One example is c++ iterators that often have multiple pointer // induction variables. In the code below we also support a case where we @@ -2224,6 +2235,13 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { BasicBlock *PreHeader = TheLoop->getLoopPreheader(); BasicBlock *Header = TheLoop->getHeader(); + // If we marked the scalar loop as "already vectorized" then no need + // to vectorize it again. + if (Header->getTerminator()->getMetadata(AlreadyVectorizedMDName)) { + DEBUG(dbgs() << "LV: This loop was vectorized before\n"); + return false; + } + // For each block in the loop. for (Loop::block_iterator bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { diff --git a/test/Transforms/LoopVectorize/vectorize-once.ll b/test/Transforms/LoopVectorize/vectorize-once.ll new file mode 100644 index 0000000000..ac1694802a --- /dev/null +++ b/test/Transforms/LoopVectorize/vectorize-once.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S -simplifycfg | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; +; We want to make sure that we are vectorizeing the scalar loop only once +; even if the pass manager runs the vectorizer multiple times due to inlining. + + +; This test checks that we add metadata to vectorized loops +; CHECK: _Z4foo1Pii +; CHECK: <4 x i32> +; CHECK: llvm.vectorizer.already_vectorized +; CHECK: ret + +; This test comes from the loop: +; +;int foo (int *A, int n) { +; return std::accumulate(A, A + n, 0); +;} +define i32 @_Z4foo1Pii(i32* %A, i32 %n) #0 { +entry: + %idx.ext = sext i32 %n to i64 + %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext + %cmp3.i = icmp eq i32 %n, 0 + br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i + +for.body.i: ; preds = %entry, %for.body.i + %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] + %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] + %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %add.i = add nsw i32 %0, %__init.addr.05.i + %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 + %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr + br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i + +_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry + %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] + ret i32 %__init.addr.0.lcssa.i +} + +; This test checks that we don't vectorize loops that are marked with the "already vectorized" metadata. +; CHECK: _Z4foo2Pii +; CHECK-NOT: <4 x i32> +; CHECK: llvm.vectorizer.already_vectorized +; CHECK: ret +define i32 @_Z4foo2Pii(i32* %A, i32 %n) #0 { +entry: + %idx.ext = sext i32 %n to i64 + %add.ptr = getelementptr inbounds i32* %A, i64 %idx.ext + %cmp3.i = icmp eq i32 %n, 0 + br i1 %cmp3.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i + +for.body.i: ; preds = %entry, %for.body.i + %__init.addr.05.i = phi i32 [ %add.i, %for.body.i ], [ 0, %entry ] + %__first.addr.04.i = phi i32* [ %incdec.ptr.i, %for.body.i ], [ %A, %entry ] + %0 = load i32* %__first.addr.04.i, align 4, !tbaa !0 + %add.i = add nsw i32 %0, %__init.addr.05.i + %incdec.ptr.i = getelementptr inbounds i32* %__first.addr.04.i, i64 1 + %cmp.i = icmp eq i32* %incdec.ptr.i, %add.ptr + br i1 %cmp.i, label %_ZSt10accumulateIPiiET0_T_S2_S1_.exit, label %for.body.i, !llvm.vectorizer.already_vectorized !3 + +_ZSt10accumulateIPiiET0_T_S2_S1_.exit: ; preds = %for.body.i, %entry + %__init.addr.0.lcssa.i = phi i32 [ 0, %entry ], [ %add.i, %for.body.i ] + ret i32 %__init.addr.0.lcssa.i +} + +attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA"} +!3 = metadata !{} + -- cgit v1.2.3-70-g09d2 From f22d9cfa6d145ce26930804cc3ac54340f49c38b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 8 Mar 2013 16:58:37 +0000 Subject: Insert the reduction start value into the first bypass block to preserve domination. Fixes PR15344. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176701 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../LoopVectorize/X86/reduction-crash.ll | 35 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopVectorize/X86/reduction-crash.ll (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 0d11372808..11f4b02204 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1643,7 +1643,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) { // To do so, we need to generate the 'identity' vector and overide // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll new file mode 100644 index 0000000000..44702c8dcf --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll @@ -0,0 +1,35 @@ +; RUN: opt -S -loop-vectorize < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-apple-darwin" + +; PR15344 +define void @test1(float* nocapture %arg, i32 %arg1) nounwind { +; CHECK: @test1 +; CHECK: preheader +; CHECK: insertelement <2 x double> zeroinitializer, double %tmp, i32 0 +; CHECK: vector.memcheck + +bb: + br label %bb2 + +bb2: ; preds = %bb + %tmp = load double* null, align 8 + br i1 undef, label %bb3, label %bb12 + +bb3: ; preds = %bb3, %bb2 + %tmp4 = phi double [ %tmp9, %bb3 ], [ %tmp, %bb2 ] + %tmp5 = phi i32 [ %tmp8, %bb3 ], [ 0, %bb2 ] + %tmp6 = getelementptr inbounds [16 x double]* undef, i32 0, i32 %tmp5 + %tmp7 = load double* %tmp6, align 4 + %tmp8 = add nsw i32 %tmp5, 1 + %tmp9 = fadd fast double %tmp4, undef + %tmp10 = getelementptr inbounds float* %arg, i32 %tmp5 + store float undef, float* %tmp10, align 4 + %tmp11 = icmp eq i32 %tmp8, %arg1 + br i1 %tmp11, label %bb12, label %bb3 + +bb12: ; preds = %bb3, %bb2 + %tmp13 = phi double [ %tmp, %bb2 ], [ %tmp9, %bb3 ] + ret void +} -- cgit v1.2.3-70-g09d2 From 56ee544a3af2b019329f06422c00e8a3645b895c Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sat, 9 Mar 2013 15:56:34 +0000 Subject: LoopVectorizer: Ignore dbg.value instructions We want vectorization to happen at -g. Ignore calls to the dbg.value intrinsic and don't transfer them to the vectorized code. radar://13378964 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176768 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 13 +++++- test/Transforms/LoopVectorize/dbg.value.ll | 70 ++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 test/Transforms/LoopVectorize/dbg.value.ll (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 11f4b02204..e1f2932231 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2088,6 +2088,10 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case Instruction::Call: { + // Ignore dbg.value instructions. + if (isa(it)) + break; + Module *M = BB->getParent()->getParent(); CallInst *CI = cast(it); Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI); @@ -2324,9 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. + // We still don't handle functions. However, we can ignore dbg.value + // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast(it); - if (CI && !getIntrinsicIDForCall(CI, TLI)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI)) { DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -3263,6 +3268,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip dbg.value instructions. + if (isa(it)) + continue; + unsigned C = getInstructionCost(it, VF); Cost += C; DEBUG(dbgs() << "LV: Found an estimated cost of "<< C <<" for VF " << diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll new file mode 100644 index 0000000000..a2ea9511bb --- /dev/null +++ b/test/Transforms/LoopVectorize/dbg.value.ll @@ -0,0 +1,70 @@ +; RUN: opt < %s -S -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine | FileCheck %s +; Make sure we vectorize with debugging turned on. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@A = global [1024 x i32] zeroinitializer, align 16 +@B = global [1024 x i32] zeroinitializer, align 16 +@C = global [1024 x i32] zeroinitializer, align 16 + +; CHECK: @test +define i32 @test() #0 { +entry: + tail call void @llvm.dbg.value(metadata !1, i64 0, metadata !9), !dbg !18 + br label %for.body, !dbg !18 + +for.body: + ;CHECK: load <4 x i32> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [1024 x i32]* @B, i64 0, i64 %indvars.iv, !dbg !19 + %0 = load i32* %arrayidx, align 4, !dbg !19, !tbaa !21 + %arrayidx2 = getelementptr inbounds [1024 x i32]* @C, i64 0, i64 %indvars.iv, !dbg !19 + %1 = load i32* %arrayidx2, align 4, !dbg !19, !tbaa !21 + %add = add nsw i32 %1, %0, !dbg !19 + %arrayidx4 = getelementptr inbounds [1024 x i32]* @A, i64 0, i64 %indvars.iv, !dbg !19 + store i32 %add, i32* %arrayidx4, align 4, !dbg !19, !tbaa !21 + %indvars.iv.next = add i64 %indvars.iv, 1, !dbg !18 + tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !9), !dbg !18 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !18 + %exitcond = icmp ne i32 %lftr.wideiv, 1024, !dbg !18 + br i1 %exitcond, label %for.body, label %for.end, !dbg !18 + +for.end: + ret i32 0, !dbg !24 +} + +declare void @llvm.dbg.declare(metadata, metadata) #1 + +declare void @llvm.dbg.value(metadata, i64, metadata) #1 + +attributes #0 = { nounwind ssp uwtable "fp-contract-model"="standard" "no-frame-pointer-elim" "no-frame-pointer-elim-non-leaf" "realign-stack" "relocation-model"="pic" "ssp-buffers-size"="8" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} + +!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test", metadata !"/path/to/somewhere", metadata !"clang", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !11, metadata !""} +!1 = metadata !{i32 0} +!2 = metadata !{metadata !3} +!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"test", metadata !"test", metadata !"test", metadata !4, i32 5, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 ()* @test, null, null, metadata !8, i32 5} +!4 = metadata !{i32 786473, metadata !"test", metadata !"/path/to/somewhere", null} +!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} +!6 = metadata !{metadata !7} +!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} +!8 = metadata !{metadata !9} +!9 = metadata !{i32 786688, metadata !10, metadata !"i", metadata !4, i32 6, metadata !7, i32 0, i32 0} +!10 = metadata !{i32 786443, metadata !3, i32 6, i32 0, metadata !4, i32 0} +!11 = metadata !{metadata !12, metadata !16, metadata !17} +!12 = metadata !{i32 786484, i32 0, null, metadata !"A", metadata !"A", metadata !"", metadata !4, i32 1, metadata !13, i32 0, i32 1, [1024 x i32]* @A, null} +!13 = metadata !{i32 786433, null, metadata !"", null, i32 0, i64 32768, i64 32, i32 0, i32 0, metadata !7, metadata !14, i32 0, i32 0} +!14 = metadata !{metadata !15} +!15 = metadata !{i32 786465, i64 0, i64 1024} +!16 = metadata !{i32 786484, i32 0, null, metadata !"B", metadata !"B", metadata !"", metadata !4, i32 2, metadata !13, i32 0, i32 1, [1024 x i32]* @B, null} +!17 = metadata !{i32 786484, i32 0, null, metadata !"C", metadata !"C", metadata !"", metadata !4, i32 3, metadata !13, i32 0, i32 1, [1024 x i32]* @C, null} +!18 = metadata !{i32 6, i32 0, metadata !10, null} +!19 = metadata !{i32 7, i32 0, metadata !20, null} +!20 = metadata !{i32 786443, metadata !10, i32 6, i32 0, metadata !4, i32 1} +!21 = metadata !{metadata !"int", metadata !22} +!22 = metadata !{metadata !"omnipotent char", metadata !23} +!23 = metadata !{metadata !"Simple C/C++ TBAA"} +!24 = metadata !{i32 9, i32 0, metadata !3, null} -- cgit v1.2.3-70-g09d2 From 738295e4570f502360c11bc51843f5a8516a9526 Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Sat, 9 Mar 2013 16:27:27 +0000 Subject: LoopVectorizer: Ignore all dbg intrinisic Ignore all DbgIntriniscInfo instructions instead of just DbgValueInst. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176769 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index e1f2932231..3da0f5d210 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2088,8 +2088,8 @@ InnerLoopVectorizer::vectorizeBlockInLoop(LoopVectorizationLegality *Legal, } case Instruction::Call: { - // Ignore dbg.value instructions. - if (isa(it)) + // Ignore dbg intrinsics. + if (isa(it)) break; Module *M = BB->getParent()->getParent(); @@ -2328,10 +2328,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { return false; }// end of PHI handling - // We still don't handle functions. However, we can ignore dbg.value + // We still don't handle functions. However, we can ignore dbg intrinsic // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast(it); - if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI)) { + if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa(CI)) { DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } @@ -3268,8 +3268,8 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) { // For each instruction in the old loop. for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // Skip dbg.value instructions. - if (isa(it)) + // Skip dbg intrinsics. + if (isa(it)) continue; unsigned C = getInstructionCost(it, VF); -- cgit v1.2.3-70-g09d2 From 0d932717d8d22b0e747b15fddea2c718043e4d51 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 9 Mar 2013 19:22:40 +0000 Subject: Remove a source of nondeterminism from the LoopVectorizer. This made us emit runtime checks in a random order. Hopefully bootstrap miscompares will go away now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@176775 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 3da0f5d210..07dd453424 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -419,7 +419,7 @@ public: /// Alias(Multi)Map stores the values (GEPs or underlying objects and their /// respective Store/Load instruction(s) to calculate aliasing. - typedef DenseMap AliasMap; + typedef MapVector AliasMap; typedef DenseMap > AliasMultiMap; /// Returns true if it is legal to vectorize this loop. -- cgit v1.2.3-70-g09d2 From d517da33b712b5d8d687ee5e5974056a4787ec4f Mon Sep 17 00:00:00 2001 From: Arnold Schwaighofer Date: Thu, 14 Mar 2013 18:54:36 +0000 Subject: LoopVectorize: Invert case when we use a vector cmp value to query select cost We generate a select with a vectorized condition argument when the condition is NOT loop invariant. Not the other way around. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177098 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- .../LoopVectorize/X86/vector-scalar-select-cost.ll | 62 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll (limited to 'lib/Transforms/Vectorize/LoopVectorize.cpp') diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 07dd453424..930d9c412f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3338,7 +3338,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { const SCEV *CondSCEV = SE->getSCEV(SI->getCondition()); bool ScalarCond = (SE->isLoopInvariant(CondSCEV, TheLoop)); Type *CondTy = SI->getCondition()->getType(); - if (ScalarCond) + if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy); diff --git a/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll new file mode 100644 index 0000000000..d1d23aa92c --- /dev/null +++ b/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -0,0 +1,62 @@ +; RUN: opt < %s -loop-vectorize -mcpu=core2 -debug-only=loop-vectorize 2>&1 -S | FileCheck %s + +; Make sure we use the right select kind when querying select costs. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +@a = common global [2048 x i32] zeroinitializer, align 16 +@b = common global [2048 x i32] zeroinitializer, align 16 +@c = common global [2048 x i32] zeroinitializer, align 16 + +; CHECK: Checking a loop in "scalarselect" +define void @scalarselect(i1 %cond) nounwind uwtable ssp { + br label %1 + +;