diff options
author | Hal Finkel <hfinkel@anl.gov> | 2012-10-31 15:17:07 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2012-10-31 15:17:07 +0000 |
commit | 72465ea23d010507d3746adc126d719005981e05 (patch) | |
tree | d5e6b1ad3aad528df1c41d88c82db6a62ba61ca4 /test/Transforms/BBVectorize | |
parent | ef026f1b5e4d52e11c67a1a5ad01eadffcfa4d8e (diff) |
BBVectorize: Choose pair ordering to minimize shuffles
BBVectorize would, except for loads and stores, always fuse instructions
so that the first instruction (in the current source order) would always
represent the low part of the input vectors and the second instruction
would always represent the high part. This lead to too many shuffles
being produced because sometimes the opposite order produces fewer of them.
With this change, BBVectorize tracks the kind of pair connections that form
the DAG of candidate pairs, and uses that information to reorder the pairs to
avoid excess shuffles. Using this information, a future commit will be able
to add VTTI-based shuffle costs to the pair selection procedure. Importantly,
the number of remaining shuffles can now be estimated during pair selection.
There are some trivial instruction reorderings in the test cases, and one
simple additional test where we certainly want to do a reordering to
avoid an unnecessary shuffle.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167122 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/Transforms/BBVectorize')
-rw-r--r-- | test/Transforms/BBVectorize/X86/loop1.ll | 2 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/X86/simple.ll | 2 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/cycle.ll | 2 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/loop1.ll | 2 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/search-limit.ll | 2 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/simple-int.ll | 6 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/simple-ldstr.ll | 4 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/simple-sel.ll | 4 | ||||
-rw-r--r-- | test/Transforms/BBVectorize/simple.ll | 43 |
9 files changed, 46 insertions, 21 deletions
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll index 9d5d9fb617..c1be62203b 100644 --- a/test/Transforms/BBVectorize/X86/loop1.ll +++ b/test/Transforms/BBVectorize/X86/loop1.ll @@ -42,8 +42,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: %mul = fmul double %0, %0 ; CHECK: %mul3 = fmul double %0, %1 ; CHECK: %add = fadd double %mul, %mul3 -; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %mul8 = fmul double %1, %1 +; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1 ; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2 ; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0 diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll index 6450f821f5..d11c9b92f0 100644 --- a/test/Transforms/BBVectorize/X86/simple.ll +++ b/test/Transforms/BBVectorize/X86/simple.ll @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll index 32a91ceee0..e8e82ce024 100644 --- a/test/Transforms/BBVectorize/cycle.ll +++ b/test/Transforms/BBVectorize/cycle.ll @@ -107,6 +107,6 @@ done: ret void ; CHECK: @test1 ; CHECK: go: -; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0 +; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0 ; FIXME: When tree pruning is deterministic, include the entire output. } diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll index bebc91ad91..c22ea5852a 100644 --- a/test/Transforms/BBVectorize/loop1.ll +++ b/test/Transforms/BBVectorize/loop1.ll @@ -42,8 +42,8 @@ for.body: ; preds = %for.body, %entry ; CHECK: %mul = fmul double %0, %0 ; CHECK: %mul3 = fmul double %0, %1 ; CHECK: %add = fadd double %mul, %mul3 -; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %mul8 = fmul double %1, %1 +; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0 ; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1 ; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2 ; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0 diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll index d9945b5630..aeaf98865b 100644 --- a/test/Transforms/BBVectorize/search-limit.ll +++ b/test/Transforms/BBVectorize/search-limit.ll @@ -7,8 +7,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK-SL4: @test1 ; CHECK-SL4-NOT: <2 x double> ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll index 6844977143..ae1d63bfd8 100644 --- a/test/Transforms/BBVectorize/simple-int.ll +++ b/test/Transforms/BBVectorize/simple-int.ll @@ -17,8 +17,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1, ret double %R ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0 @@ -43,8 +43,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ret double %R ; CHECK: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1) @@ -68,8 +68,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) { ret double %R ; CHECK: @test3 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 ; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 ; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P) diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll index 6883e844be..7dd77c933f 100644 --- a/test/Transforms/BBVectorize/simple-ldstr.ll +++ b/test/Transforms/BBVectorize/simple-ldstr.ll @@ -94,13 +94,13 @@ entry: ; CHECK-AO: @test3 ; CHECK-AO: %i0 = load double* %a, align 8 ; CHECK-AO: %i1 = load double* %b, align 8 -; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0 -; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0 ; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1 ; CHECK-AO: %i3 = load double* %arrayidx3, align 8 ; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1 ; CHECK-AO: %i4 = load double* %arrayidx4, align 8 +; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0 ; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1 +; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0 ; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1 ; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2 ; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float> diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll index 325792a5dc..15ecb59702 100644 --- a/test/Transforms/BBVectorize/simple-sel.ll +++ b/test/Transforms/BBVectorize/simple-sel.ll @@ -6,8 +6,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) { ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -33,8 +33,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test2 ; CHECK-NB: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll index 7cd8133bee..d9a12eebed 100644 --- a/test/Transforms/BBVectorize/simple.ll +++ b/test/Transforms/BBVectorize/simple.ll @@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define double @test1(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test1 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -29,8 +29,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) { define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test2 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -40,12 +40,13 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { ; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 %Z1 = fadd double %Y2, %B1 %Z2 = fadd double %Y1, %B2 -; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0> -; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2 +; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0 +; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1 +; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2 %R = fmul double %Z1, %Z2 -; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 -; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 -; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 +; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0 +; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1 +; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1 ret double %R ; CHECK: ret double %R } @@ -54,8 +55,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) { define double @test3(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test3 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -79,8 +80,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2) { define double @test4(double %A1, double %A2, double %B1, double %B2) { ; CHECK: @test4 ; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 -; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 ; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 %X1 = fsub double %A1, %B1 %X2 = fsub double %A2, %B2 @@ -148,3 +149,27 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) { ; CHECK: ret <8 x i8> %R } +; Basic depth-3 chain (flipped order) +define double @test7(double %A1, double %A2, double %B1, double %B2) { +; CHECK: @test7 +; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0 +; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1 +; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0 +; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1 + %X1 = fsub double %A1, %B1 + %X2 = fsub double %A2, %B2 +; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2 + %Y1 = fmul double %X1, %A1 + %Y2 = fmul double %X2, %A2 +; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2 + %Z2 = fadd double %Y2, %B2 + %Z1 = fadd double %Y1, %B1 +; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2 + %R = fmul double %Z1, %Z2 +; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0 +; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1 +; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2 + ret double %R +; CHECK: ret double %R +} + |