diff options
author | Alexander Kornienko <alexfh@google.com> | 2013-03-14 10:51:38 +0000 |
---|---|---|
committer | Alexander Kornienko <alexfh@google.com> | 2013-03-14 10:51:38 +0000 |
commit | 647735c781c5b37061ee03d6e9e6c7dda92218e2 (patch) | |
tree | 5a5e56606d41060263048b5a5586b3d2380898ba /test/CodeGen/X86/Stats | |
parent | 6aed25d93d1cfcde5809a73ffa7dc1b0d6396f66 (diff) | |
parent | f635ef401786c84df32090251a8cf45981ecca33 (diff) |
Updating branches/google/stable to r176857
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/google/stable@177040 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/X86/Stats')
31 files changed, 1644 insertions, 0 deletions
diff --git a/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll new file mode 100644 index 0000000000..0af2445d7f --- /dev/null +++ b/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll @@ -0,0 +1,18 @@ +; The old instruction selector used to load all arguments to a call up in +; registers, then start pushing them all onto the stack. This is bad news as +; it makes a ton of annoying overlapping live ranges. This code should not +; cause spills! +; +; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled + +target datalayout = "e-p:32:32" + +define i32 @test(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) { + ret i32 0 +} + +define i32 @main() { + %X = call i32 @test( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ) ; <i32> [#uses=1] + ret i32 %X +} + diff --git a/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll new file mode 100644 index 0000000000..1a3d74918d --- /dev/null +++ b/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=x86 -stats 2>&1 | \ +; RUN: grep asm-printer | grep 7 + +define i32 @g(i32 %a, i32 %b) nounwind { + %tmp.1 = shl i32 %b, 1 ; <i32> [#uses=1] + %tmp.3 = add i32 %tmp.1, %a ; <i32> [#uses=1] + %tmp.5 = mul i32 %tmp.3, %a ; <i32> [#uses=1] + %tmp.8 = mul i32 %b, %b ; <i32> [#uses=1] + %tmp.9 = add i32 %tmp.5, %tmp.8 ; <i32> [#uses=1] + ret i32 %tmp.9 +} + diff --git a/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll new file mode 100644 index 0000000000..5cba3efeef --- /dev/null +++ b/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll @@ -0,0 +1,76 @@ +; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \ +; RUN: not grep "Number of register spills" +; END. + + +define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) { + %tmp44 = load <4 x float>* %a ; <<4 x float>> [#uses=9] + %tmp46 = load <4 x float>* %b ; <<4 x float>> [#uses=1] + %tmp48 = load <4 x float>* %c ; <<4 x float>> [#uses=1] + %tmp50 = load <4 x float>* %d ; <<4 x float>> [#uses=1] + %tmp51 = bitcast <4 x float> %tmp44 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2] + %tmp52 = bitcast <4 x i32> %tmp to <4 x float> ; <<4 x float>> [#uses=1] + %tmp60 = xor <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] + %tmp61 = bitcast <4 x i32> %tmp60 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp74 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp52, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp75 = bitcast <4 x float> %tmp74 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp89 = bitcast <4 x float> %tmp88 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 ) ; <<4 x i32>> [#uses=1] + %tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float> ; <<4 x float>> [#uses=1] + store <4 x float> %tmp105.upgrd.2, <4 x float>* %a + %tmp108 = bitcast <4 x float> %tmp46 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp109 = shufflevector <4 x i32> %tmp108, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2] + %tmp109.upgrd.3 = bitcast <4 x i32> %tmp109 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp119 = xor <4 x i32> %tmp109, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] + %tmp120 = bitcast <4 x i32> %tmp119 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp133 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp109.upgrd.3, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp134 = bitcast <4 x float> %tmp133 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp148 = bitcast <4 x float> %tmp147 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 ) ; <<4 x i32>> [#uses=1] + %tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float> ; <<4 x float>> [#uses=1] + store <4 x float> %tmp166.upgrd.4, <4 x float>* %b + %tmp169 = bitcast <4 x float> %tmp48 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp170 = shufflevector <4 x i32> %tmp169, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2] + %tmp170.upgrd.5 = bitcast <4 x i32> %tmp170 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp180 = xor <4 x i32> %tmp170, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] + %tmp181 = bitcast <4 x i32> %tmp180 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp194 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp170.upgrd.5, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp195 = bitcast <4 x float> %tmp194 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp209 = bitcast <4 x float> %tmp208 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 ) ; <<4 x i32>> [#uses=1] + %tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float> ; <<4 x float>> [#uses=1] + store <4 x float> %tmp227.upgrd.6, <4 x float>* %c + %tmp230 = bitcast <4 x float> %tmp50 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp231 = shufflevector <4 x i32> %tmp230, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2] + %tmp231.upgrd.7 = bitcast <4 x i32> %tmp231 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp241 = xor <4 x i32> %tmp231, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1] + %tmp242 = bitcast <4 x i32> %tmp241 to <4 x float> ; <<4 x float>> [#uses=1] + %tmp255 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp231.upgrd.7, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp256 = bitcast <4 x float> %tmp255 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 ) ; <<4 x float>> [#uses=1] + %tmp270 = bitcast <4 x float> %tmp269 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 ) ; <<4 x i32>> [#uses=1] + %tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float> ; <<4 x float>> [#uses=1] + store <4 x float> %tmp288.upgrd.8, <4 x float>* %d + ret i32 0 +} + +declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) + +declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll new file mode 100644 index 0000000000..1c75f93915 --- /dev/null +++ b/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ +; RUN: grep asm-printer | grep 14 +; +@size20 = external global i32 ; <i32*> [#uses=1] +@in5 = external global i8* ; <i8**> [#uses=1] + +define i32 @compare(i8* %a, i8* %b) nounwind { + %tmp = bitcast i8* %a to i32* ; <i32*> [#uses=1] + %tmp1 = bitcast i8* %b to i32* ; <i32*> [#uses=1] + %tmp.upgrd.1 = load i32* @size20 ; <i32> [#uses=1] + %tmp.upgrd.2 = load i8** @in5 ; <i8*> [#uses=2] + %tmp3 = load i32* %tmp1 ; <i32> [#uses=1] + %gep.upgrd.3 = zext i32 %tmp3 to i64 ; <i64> [#uses=1] + %tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3 ; <i8*> [#uses=2] + %tmp7 = load i32* %tmp ; <i32> [#uses=1] + %gep.upgrd.4 = zext i32 %tmp7 to i64 ; <i64> [#uses=1] + %tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4 ; <i8*> [#uses=2] + %tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 ) ; <i32> [#uses=1] + ret i32 %tmp.upgrd.5 +} + +declare i32 @memcmp(i8*, i8*, i32) + diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll new file mode 100644 index 0000000000..95eefa1e71 --- /dev/null +++ b/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=x86 -stats 2>&1 | \ +; RUN: grep asm-printer | grep 13 + +define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind { +newFuncRoot: + br label %cond_true456.i +bb459.i.exitStub: ; preds = %cond_true456.i + store i32 %tmp449.i, i32* %tmp449.i.out + ret void +cond_true456.i: ; preds = %cond_true456.i, %newFuncRoot + %__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ] ; <i8*> [#uses=2] + %__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ] ; <i32> [#uses=1] + %tmp446.i = mul i32 %__h.2.4.i, 5 ; <i32> [#uses=1] + %tmp.i = load i8* %__s441.2.4.i ; <i8> [#uses=1] + %tmp448.i = sext i8 %tmp.i to i32 ; <i32> [#uses=1] + %tmp449.i = add i32 %tmp448.i, %tmp446.i ; <i32> [#uses=2] + %tmp450.i = ptrtoint i8* %__s441.2.4.i to i32 ; <i32> [#uses=1] + %tmp451.i = add i32 %tmp450.i, 1 ; <i32> [#uses=1] + %tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8* ; <i8*> [#uses=2] + %tmp45435.i = load i8* %tmp451.i.upgrd.1 ; <i8> [#uses=1] + %tmp45536.i = icmp eq i8 %tmp45435.i, 0 ; <i1> [#uses=1] + br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i +} + diff --git a/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll b/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll new file mode 100644 index 0000000000..37c510786a --- /dev/null +++ b/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \ +; RUN: grep "asm-printer" | grep 35 + +target datalayout = "e-p:32:32" +define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { +entry: + %tmp9 = icmp slt i32 %M, 5 ; <i1> [#uses=1] + br i1 %tmp9, label %return, label %cond_true + +cond_true: ; preds = %cond_true, %entry + %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2] + %tmp. = shl i32 %indvar, 2 ; <i32> [#uses=1] + %tmp.10 = add nsw i32 %tmp., 1 ; <i32> [#uses=2] + %tmp31 = add nsw i32 %tmp.10, -1 ; <i32> [#uses=4] + %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1] + %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1] + %tmp = load <16 x i8>* %tmp34, align 1 + %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1] + %tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1] + %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1] + %tmp54 = bitcast <16 x i8> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2] + %tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1] + %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1] + %tmp66 = load <16 x i8>* %tmp65, align 1 + %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1] + %tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1] + %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1] + %tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp88 = add <4 x i32> %tmp87, %tmp77 ; <<4 x i32>> [#uses=2] + %tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 ) ; <<4 x i32>> [#uses=1] + %tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64> ; <<2 x i64>> [#uses=2] + %tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 > ; <<2 x i64>> [#uses=1] + %tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1] + %tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1] + %tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1] + %tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1] + %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1] + store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7 + %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1] + %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1] + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1] + br i1 %tmp.upgrd.8, label %cond_true, label %return + +return: ; preds = %cond_true, %entry + ret void +} + +declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) diff --git a/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll new file mode 100644 index 0000000000..a1b973d7cc --- /dev/null +++ b/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll @@ -0,0 +1,219 @@ +; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16 +; PR1909 + +@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1] + +define void @minmax(float* %result) nounwind optsize { +entry: + %tmp2 = load float* %result, align 4 ; <float> [#uses=6] + %tmp4 = getelementptr float* %result, i32 2 ; <float*> [#uses=5] + %tmp5 = load float* %tmp4, align 4 ; <float> [#uses=10] + %tmp7 = getelementptr float* %result, i32 4 ; <float*> [#uses=5] + %tmp8 = load float* %tmp7, align 4 ; <float> [#uses=8] + %tmp10 = getelementptr float* %result, i32 6 ; <float*> [#uses=3] + %tmp11 = load float* %tmp10, align 4 ; <float> [#uses=8] + %tmp12 = fcmp olt float %tmp8, %tmp11 ; <i1> [#uses=5] + br i1 %tmp12, label %bb, label %bb21 + +bb: ; preds = %entry + %tmp23469 = fcmp olt float %tmp5, %tmp8 ; <i1> [#uses=1] + br i1 %tmp23469, label %bb26, label %bb30 + +bb21: ; preds = %entry + %tmp23 = fcmp olt float %tmp5, %tmp11 ; <i1> [#uses=1] + br i1 %tmp23, label %bb26, label %bb30 + +bb26: ; preds = %bb21, %bb + %tmp52471 = fcmp olt float %tmp2, %tmp5 ; <i1> [#uses=1] + br i1 %tmp52471, label %bb111, label %bb59 + +bb30: ; preds = %bb21, %bb + br i1 %tmp12, label %bb40, label %bb50 + +bb40: ; preds = %bb30 + %tmp52473 = fcmp olt float %tmp2, %tmp8 ; <i1> [#uses=1] + br i1 %tmp52473, label %bb111, label %bb59 + +bb50: ; preds = %bb30 + %tmp52 = fcmp olt float %tmp2, %tmp11 ; <i1> [#uses=1] + br i1 %tmp52, label %bb111, label %bb59 + +bb59: ; preds = %bb50, %bb40, %bb26 + br i1 %tmp12, label %bb72, label %bb80 + +bb72: ; preds = %bb59 + %tmp82475 = fcmp olt float %tmp5, %tmp8 ; <i1> [#uses=2] + %brmerge786 = or i1 %tmp82475, %tmp12 ; <i1> [#uses=1] + %tmp4.mux787 = select i1 %tmp82475, float* %tmp4, float* %tmp7 ; <float*> [#uses=1] + br i1 %brmerge786, label %bb111, label %bb103 + +bb80: ; preds = %bb59 + %tmp82 = fcmp olt float %tmp5, %tmp11 ; <i1> [#uses=2] + %brmerge = or i1 %tmp82, %tmp12 ; <i1> [#uses=1] + %tmp4.mux = select i1 %tmp82, float* %tmp4, float* %tmp7 ; <float*> [#uses=1] + br i1 %brmerge, label %bb111, label %bb103 + +bb103: ; preds = %bb80, %bb72 + br label %bb111 + +bb111: ; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26 + %iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ] ; <float*> [#uses=1] + %iftmp.0.0 = load float* %iftmp.0.0.in ; <float> [#uses=1] + %tmp125 = fcmp ogt float %tmp8, %tmp11 ; <i1> [#uses=5] + br i1 %tmp125, label %bb128, label %bb136 + +bb128: ; preds = %bb111 + %tmp138477 = fcmp ogt float %tmp5, %tmp8 ; <i1> [#uses=1] + br i1 %tmp138477, label %bb141, label %bb145 + +bb136: ; preds = %bb111 + %tmp138 = fcmp ogt float %tmp5, %tmp11 ; <i1> [#uses=1] + br i1 %tmp138, label %bb141, label %bb145 + +bb141: ; preds = %bb136, %bb128 + %tmp167479 = fcmp ogt float %tmp2, %tmp5 ; <i1> [#uses=1] + br i1 %tmp167479, label %bb226, label %bb174 + +bb145: ; preds = %bb136, %bb128 + br i1 %tmp125, label %bb155, label %bb165 + +bb155: ; preds = %bb145 + %tmp167481 = fcmp ogt float %tmp2, %tmp8 ; <i1> [#uses=1] + br i1 %tmp167481, label %bb226, label %bb174 + +bb165: ; preds = %bb145 + %tmp167 = fcmp ogt float %tmp2, %tmp11 ; <i1> [#uses=1] + br i1 %tmp167, label %bb226, label %bb174 + +bb174: ; preds = %bb165, %bb155, %bb141 + br i1 %tmp125, label %bb187, label %bb195 + +bb187: ; preds = %bb174 + %tmp197483 = fcmp ogt float %tmp5, %tmp8 ; <i1> [#uses=2] + %brmerge790 = or i1 %tmp197483, %tmp125 ; <i1> [#uses=1] + %tmp4.mux791 = select i1 %tmp197483, float* %tmp4, float* %tmp7 ; <float*> [#uses=1] + br i1 %brmerge790, label %bb226, label %bb218 + +bb195: ; preds = %bb174 + %tmp197 = fcmp ogt float %tmp5, %tmp11 ; <i1> [#uses=2] + %brmerge788 = or i1 %tmp197, %tmp125 ; <i1> [#uses=1] + %tmp4.mux789 = select i1 %tmp197, float* %tmp4, float* %tmp7 ; <float*> [#uses=1] + br i1 %brmerge788, label %bb226, label %bb218 + +bb218: ; preds = %bb195, %bb187 + br label %bb226 + +bb226: ; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141 + %iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ] ; <float*> [#uses=1] + %iftmp.7.0 = load float* %iftmp.7.0.in ; <float> [#uses=1] + %tmp229 = getelementptr float* %result, i32 1 ; <float*> [#uses=7] + %tmp230 = load float* %tmp229, align 4 ; <float> [#uses=6] + %tmp232 = getelementptr float* %result, i32 3 ; <float*> [#uses=5] + %tmp233 = load float* %tmp232, align 4 ; <float> [#uses=10] + %tmp235 = getelementptr float* %result, i32 5 ; <float*> [#uses=5] + %tmp236 = load float* %tmp235, align 4 ; <float> [#uses=8] + %tmp238 = getelementptr float* %result, i32 7 ; <float*> [#uses=3] + %tmp239 = load float* %tmp238, align 4 ; <float> [#uses=8] + %tmp240 = fcmp olt float %tmp236, %tmp239 ; <i1> [#uses=5] + br i1 %tmp240, label %bb243, label %bb251 + +bb243: ; preds = %bb226 + %tmp253485 = fcmp olt float %tmp233, %tmp236 ; <i1> [#uses=1] + br i1 %tmp253485, label %bb256, label %bb260 + +bb251: ; preds = %bb226 + %tmp253 = fcmp olt float %tmp233, %tmp239 ; <i1> [#uses=1] + br i1 %tmp253, label %bb256, label %bb260 + +bb256: ; preds = %bb251, %bb243 + %tmp282487 = fcmp olt float %tmp230, %tmp233 ; <i1> [#uses=1] + br i1 %tmp282487, label %bb341, label %bb289 + +bb260: ; preds = %bb251, %bb243 + br i1 %tmp240, label %bb270, label %bb280 + +bb270: ; preds = %bb260 + %tmp282489 = fcmp olt float %tmp230, %tmp236 ; <i1> [#uses=1] + br i1 %tmp282489, label %bb341, label %bb289 + +bb280: ; preds = %bb260 + %tmp282 = fcmp olt float %tmp230, %tmp239 ; <i1> [#uses=1] + br i1 %tmp282, label %bb341, label %bb289 + +bb289: ; preds = %bb280, %bb270, %bb256 + br i1 %tmp240, label %bb302, label %bb310 + +bb302: ; preds = %bb289 + %tmp312491 = fcmp olt float %tmp233, %tmp236 ; <i1> [#uses=2] + %brmerge793 = or i1 %tmp312491, %tmp240 ; <i1> [#uses=1] + %tmp232.mux794 = select i1 %tmp312491, float* %tmp232, float* %tmp235 ; <float*> [#uses=1] + br i1 %brmerge793, label %bb341, label %bb333 + +bb310: ; preds = %bb289 + %tmp312 = fcmp olt float %tmp233, %tmp239 ; <i1> [#uses=2] + %brmerge792 = or i1 %tmp312, %tmp240 ; <i1> [#uses=1] + %tmp232.mux = select i1 %tmp312, float* %tmp232, float* %tmp235 ; <float*> [#uses=1] + br i1 %brmerge792, label %bb341, label %bb333 + +bb333: ; preds = %bb310, %bb302 + br label %bb341 + +bb341: ; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256 + %iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ] ; <float*> [#uses=1] + %iftmp.14.0 = load float* %iftmp.14.0.in ; <float> [#uses=1] + %tmp355 = fcmp ogt float %tmp236, %tmp239 ; <i1> [#uses=5] + br i1 %tmp355, label %bb358, label %bb366 + +bb358: ; preds = %bb341 + %tmp368493 = fcmp ogt float %tmp233, %tmp236 ; <i1> [#uses=1] + br i1 %tmp368493, label %bb371, label %bb375 + +bb366: ; preds = %bb341 + %tmp368 = fcmp ogt float %tmp233, %tmp239 ; <i1> [#uses=1] + br i1 %tmp368, label %bb371, label %bb375 + +bb371: ; preds = %bb366, %bb358 + %tmp397495 = fcmp ogt float %tmp230, %tmp233 ; <i1> [#uses=1] + br i1 %tmp397495, label %bb456, label %bb404 + +bb375: ; preds = %bb366, %bb358 + br i1 %tmp355, label %bb385, label %bb395 + +bb385: ; preds = %bb375 + %tmp397497 = fcmp ogt float %tmp230, %tmp236 ; <i1> [#uses=1] + br i1 %tmp397497, label %bb456, label %bb404 + +bb395: ; preds = %bb375 + %tmp397 = fcmp ogt float %tmp230, %tmp239 ; <i1> [#uses=1] + br i1 %tmp397, label %bb456, label %bb404 + +bb404: ; preds = %bb395, %bb385, %bb371 + br i1 %tmp355, label %bb417, label %bb425 + +bb417: ; preds = %bb404 + %tmp427499 = fcmp ogt float %tmp233, %tmp236 ; <i1> [#uses=2] + %brmerge797 = or i1 %tmp427499, %tmp355 ; <i1> [#uses=1] + %tmp232.mux798 = select i1 %tmp427499, float* %tmp232, float* %tmp235 ; <float*> [#uses=1] + br i1 %brmerge797, label %bb456, label %bb448 + +bb425: ; preds = %bb404 + %tmp427 = fcmp ogt float %tmp233, %tmp239 ; <i1> [#uses=2] + %brmerge795 = or i1 %tmp427, %tmp355 ; <i1> [#uses=1] + %tmp232.mux796 = select i1 %tmp427, float* %tmp232, float* %tmp235 ; <float*> [#uses=1] + br i1 %brmerge795, label %bb456, label %bb448 + +bb448: ; preds = %bb425, %bb417 + br label %bb456 + +bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371 + %iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ] ; <float*> [#uses=1] + %iftmp.21.0 = load float* %iftmp.21.0.in ; <float> [#uses=1] + %tmp458459 = fpext float %iftmp.21.0 to double ; <double> [#uses=1] + %tmp460461 = fpext float %iftmp.7.0 to double ; <double> [#uses=1] + %tmp462463 = fpext float %iftmp.14.0 to double ; <double> [#uses=1] + %tmp464465 = fpext float %iftmp.0.0 to double ; <double> [#uses=1] + %tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind ; <i32> [#uses=0] + ret void +} + +declare i32 @printf(i8*, ...) nounwind diff --git a/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll new file mode 100644 index 0000000000..b2cf34cd20 --- /dev/null +++ b/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll @@ -0,0 +1,51 @@ +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s +; Now this test spills one register. But a reload in the loop is cheaper than +; the divsd so it's a win. + +define fastcc void @fourn(double* %data, i32 %isign) nounwind { +; CHECK: fourn +entry: + br label %bb + +bb: ; preds = %bb, %entry + %indvar93 = phi i32 [ 0, %entry ], [ %idim.030, %bb ] ; <i32> [#uses=2] + %idim.030 = add i32 %indvar93, 1 ; <i32> [#uses=1] + %0 = add i32 %indvar93, 2 ; <i32> [#uses=1] + %1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1] + br i1 %1, label %bb30.loopexit, label %bb + +; CHECK: %bb30.loopexit +; CHECK: divsd %xmm0 +; CHECK: movsd %xmm0, 16(%esp) +; CHECK: %bb3 +bb3: ; preds = %bb30.loopexit, %bb25, %bb3 + %2 = load i32* null, align 4 ; <i32> [#uses=1] + %3 = mul i32 %2, 0 ; <i32> [#uses=1] + %4 = icmp slt i32 0, %3 ; <i1> [#uses=1] + br i1 %4, label %bb18, label %bb3 + +bb18: ; preds = %bb3 + %5 = fdiv double %11, 0.000000e+00 ; <double> [#uses=1] + %6 = tail call double @sin(double %5) nounwind readonly ; <double> [#uses=1] + br label %bb24.preheader + +bb22.preheader: ; preds = %bb24.preheader, %bb22.preheader + br label %bb22.preheader + +bb25: ; preds = %bb24.preheader + %7 = fmul double 0.000000e+00, %6 ; <double> [#uses=0] + %8 = add i32 %i3.122100, 0 ; <i32> [#uses=1] + %9 = icmp sgt i32 %8, 0 ; <i1> [#uses=1] + br i1 %9, label %bb3, label %bb24.preheader + +bb24.preheader: ; preds = %bb25, %bb18 + %i3.122100 = or i32 0, 1 ; <i32> [#uses=2] + %10 = icmp slt i32 0, %i3.122100 ; <i1> [#uses=1] + br i1 %10, label %bb25, label %bb22.preheader + +bb30.loopexit: ; preds = %bb + %11 = fmul double 0.000000e+00, 0x401921FB54442D1C ; <double> [#uses=1] + br label %bb3 +} + +declare double @sin(double) nounwind readonly diff --git a/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll new file mode 100644 index 0000000000..9cbf350940 --- /dev/null +++ b/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted +; rdar://6608609 + +define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone { +entry: + %tmp.i2 = bitcast <2 x double> %B to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458> ; <<2 x i64>> [#uses=1] + %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double> ; <<2 x double>> [#uses=1] + %0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone ; <<2 x double>> [#uses=1] + %tmp.i = fadd <2 x double> %0, %C ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp.i +} + +declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone diff --git a/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll new file mode 100644 index 0000000000..d50fe6f73a --- /dev/null +++ b/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm" +; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s +; rdar://6627786 +; rdar://7792037 + +target triple = "x86_64-apple-darwin10.0" + %struct.Key = type { i64 } + %struct.__Rec = type opaque + %struct.__vv = type { } + +define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp { +entry: + br label %bb4 + +bb4: ; preds = %bb.i, %bb26, %bb4, %entry +; CHECK: %bb4 +; CHECK: xorb +; CHECK: callq +; CHECK: movq +; CHECK: xorl +; CHECK: xorb + + %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0] + %ins = or i64 %p, 2097152 ; <i64> [#uses=1] + %1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; <i32> [#uses=1] + %cond = icmp eq i32 %1, 1 ; <i1> [#uses=1] + br i1 %cond, label %bb26, label %bb4 + +bb26: ; preds = %bb4 + %2 = and i64 %ins, 15728640 ; <i64> [#uses=1] + %cond.i = icmp eq i64 %2, 1048576 ; <i1> [#uses=1] + br i1 %cond.i, label %bb.i, label %bb4 + +bb.i: ; preds = %bb26 + %3 = load i32* null, align 4 ; <i32> [#uses=1] + %4 = uitofp i32 %3 to float ; <float> [#uses=1] + %.sum13.i = add i64 0, 4 ; <i64> [#uses=1] + %5 = getelementptr i8* null, i64 %.sum13.i ; <i8*> [#uses=1] + %6 = bitcast i8* %5 to i32* ; <i32*> [#uses=1] + %7 = load i32* %6, align 4 ; <i32> [#uses=1] + %8 = uitofp i32 %7 to float ; <float> [#uses=1] + %.sum.i = add i64 0, 8 ; <i64> [#uses=1] + %9 = getelementptr i8* null, i64 %.sum.i ; <i8*> [#uses=1] + %10 = bitcast i8* %9 to i32* ; <i32*> [#uses=1] + %11 = load i32* %10, align 4 ; <i32> [#uses=1] + %12 = uitofp i32 %11 to float ; <float> [#uses=1] + %13 = insertelement <4 x float> undef, float %4, i32 0 ; <<4 x float>> [#uses=1] + %14 = insertelement <4 x float> %13, float %8, i32 1 ; <<4 x float>> [#uses=1] + %15 = insertelement <4 x float> %14, float %12, i32 2 ; <<4 x float>> [#uses=1] + store <4 x float> %15, <4 x float>* null, align 16 + br label %bb4 +} + +declare i32 @xxGetOffsetForCode(...) + +declare i32 @xxCalculateMidType(...) diff --git a/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll new file mode 100644 index 0000000000..d934ec9a88 --- /dev/null +++ b/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll @@ -0,0 +1,242 @@ +; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t +; RUN: not grep spill %t +; RUN: not grep "%rsp" %t +; RUN: not grep "%rbp" %t + +; The register-pressure scheduler should be able to schedule this in a +; way that does not require spills. + +@X = external global i64 ; <i64*> [#uses=25] + +define fastcc i64 @foo() nounwind { + %tmp = load volatile i64* @X ; <i64> [#uses=7] + %tmp1 = load volatile i64* @X ; <i64> [#uses=5] + %tmp2 = load volatile i64* @X ; <i64> [#uses=3] + %tmp3 = load volatile i64* @X ; <i64> [#uses=1] + %tmp4 = load volatile i64* @X ; <i64> [#uses=5] + %tmp5 = load volatile i64* @X ; <i64> [#uses=3] + %tmp6 = load volatile i64* @X ; <i64> [#uses=2] + %tmp7 = load volatile i64* @X ; <i64> [#uses=1] + %tmp8 = load volatile i64* @X ; <i64> [#uses=1] + %tmp9 = load volatile i64* @X ; <i64> [#uses=1] + %tmp10 = load volatile i64* @X ; <i64> [#uses=1] + %tmp11 = load volatile i64* @X ; <i64> [#uses=1] + %tmp12 = load volatile i64* @X ; <i64> [#uses=1] + %tmp13 = load volatile i64* @X ; <i64> [#uses=1] + %tmp14 = load volatile i64* @X ; <i64> [#uses=1] + %tmp15 = load volatile i64* @X ; <i64> [#uses=1] + %tmp16 = load volatile i64* @X ; <i64> [#uses=1] + %tmp17 = load volatile i64* @X ; <i64> [#uses=1] + %tmp18 = load volatile i64* @X ; <i64> [#uses=1] + %tmp19 = load volatile i64* @X ; <i64> [#uses=1] + %tmp20 = load volatile i64* @X ; <i64> [#uses=1] + %tmp21 = load volatile i64* @X ; <i64> [#uses=1] + %tmp22 = load volatile i64* @X ; <i64> [#uses=1] + %tmp23 = load volatile i64* @X ; <i64> [#uses=1] + %tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8) ; <i64> [#uses=1] + %tmp25 = add i64 %tmp6, %tmp5 ; <i64> [#uses=1] + %tmp26 = add i64 %tmp25, %tmp4 ; <i64> [#uses=1] + %tmp27 = add i64 %tmp7, %tmp4 ; <i64> [#uses=1] + %tmp28 = add i64 %tmp27, %tmp26 ; <i64> [#uses=1] + %tmp29 = add i64 %tmp28, %tmp24 ; <i64> [#uses=2] + %tmp30 = add i64 %tmp2, %tmp1 ; <i64> [#uses=1] + %tmp31 = add i64 %tmp30, %tmp ; <i64> [#uses=1] + %tmp32 = add i64 %tmp2, %tmp1 ; <i64> [#uses=1] + %tmp33 = add i64 %tmp31, %tmp32 ; <i64> [#uses=1] + %tmp34 = add i64 %tmp29, %tmp3 ; <i64> [#uses=5] + %tmp35 = add i64 %tmp33, %tmp ; <i64> [#uses=1] + %tmp36 = add i64 %tmp35, %tmp29 ; <i64> [#uses=7] + %tmp37 = call i64 @llvm.bswap.i64(i64 %tmp9) ; <i64> [#uses=1] + %tmp38 = add i64 %tmp4, %tmp5 ; <i64> [#uses=1] + %tmp39 = add i64 %tmp38, %tmp34 ; <i64> [#uses=1] + %tmp40 = add i64 %tmp6, %tmp37 ; <i64> [#uses=1] + %tmp41 = add i64 %tmp40, %tmp39 ; <i64> [#uses=1] + %tmp42 = add i64 %tmp41, %tmp34 ; <i64> [#uses=2] + %tmp43 = add i64 %tmp1, %tmp ; <i64> [#uses=1] + %tmp44 = add i64 %tmp36, %tmp43 ; <i64> [#uses=1] + %tmp45 = add i64 %tmp1, %tmp ; <i64> [#uses=1] + %tmp46 = add i64 %tmp44, %tmp45 ; <i64> [#uses=1] + %tmp47 = add i64 %tmp42, %tmp2 ; <i64> [#uses=5] + %tmp48 = add i64 %tmp36, %tmp46 ; <i64> [#uses=1] + %tmp49 = add i64 %tmp48, %tmp42 ; <i64> [#uses=7] + %tmp50 = call i64 @llvm.bswap.i64(i64 %tmp10) ; <i64> [#uses=1] + %tmp51 = add i64 %tmp34, %tmp4 ; <i64> [#uses=1] + %tmp52 = add i64 %tmp51, %tmp47 ; <i64> [#uses=1] + %tmp53 = add i64 %tmp5, %tmp50 ; <i64> [#uses=1] + %tmp54 = add i64 %tmp53, %tmp52 ; <i64> [#uses=1] + %tmp55 = add i64 %tmp54, %tmp47 ; <i64> [#uses=2] + %tmp56 = add i64 %tmp36, %tmp ; <i64> [#uses=1] + %tmp57 = add i64 %tmp49, %tmp56 ; <i64> [#uses=1] + %tmp58 = add i64 %tmp36, %tmp ; <i64> [#uses=1] + %tmp59 = add i64 %tmp57, %tmp58 ; <i64> [#uses=1] + %tmp60 = add i64 %tmp55, %tmp1 ; <i64> [#uses=5] + %tmp61 = add i64 %tmp49, %tmp59 ; <i64> [#uses=1] + %tmp62 = add i64 %tmp61, %tmp55 ; <i64> [#uses=7] + %tmp63 = call i64 @llvm.bswap.i64(i64 %tmp11) ; <i64> [#uses=1] + %tmp64 = add i64 %tmp47, %tmp34 ; <i64> [#uses=1] + %tmp65 = add i64 %tmp64, %tmp60 ; <i64> [#uses=1] + %tmp66 = add i64 %tmp4, %tmp63 ; <i64> [#uses=1] + %tmp67 = add i64 %tmp66, %tmp65 ; <i64> [#uses=1] + %tmp68 = add i64 %tmp67, %tmp60 ; <i64> [#uses=2] + %tmp69 = add i64 %tmp49, %tmp36 ; <i64> [#uses=1] + %tmp70 = add i64 %tmp62, %tmp69 ; <i64> [#uses=1] + %tmp71 = add i64 %tmp49, %tmp36 ; <i64> [#uses=1] + %tmp72 = add i64 %tmp70, %tmp71 ; <i64> [#uses=1] + %tmp73 = add i64 %tmp68, %tmp ; <i64> [#uses=5] + %tmp74 = add i64 %tmp62, %tmp72 ; <i64> [#uses=1] + %tmp75 = add i64 %tmp74, %tmp68 ; <i64> [#uses=7] + %tmp76 = call i64 @llvm.bswap.i64(i64 %tmp12) ; <i64> [#uses=1] + %tmp77 = add i64 %tmp60, %tmp47 ; <i64> [#uses=1] + %tmp78 = add i64 %tmp77, %tmp73 ; <i64> [#uses=1] + %tmp79 = add i64 %tmp34, %tmp76 ; <i64> [#uses=1] + %tmp80 = add i64 %tmp79, %tmp78 ; <i64> [#uses=1] + %tmp81 = add i64 %tmp80, %tmp73 ; <i64> [#uses=2] + %tmp82 = add i64 %tmp62, %tmp49 ; <i64> [#uses=1] + %tmp83 = add i64 %tmp75, %tmp82 ; <i64> [#uses=1] + %tmp84 = add i64 %tmp62, %tmp49 ; <i64> [#uses=1] + %tmp85 = add i64 %tmp83, %tmp84 ; <i64> [#uses=1] + %tmp86 = add i64 %tmp81, %tmp36 ; <i64> [#uses=5] + %tmp87 = add i64 %tmp75, %tmp85 ; <i64> [#uses=1] + %tmp88 = add i64 %tmp87, %tmp81 ; <i64> [#uses=7] + %tmp89 = call i64 @llvm.bswap.i64(i64 %tmp13) ; <i64> [#uses=1] + %tmp90 = add i64 %tmp73, %tmp60 ; <i64> [#uses=1] + %tmp91 = add i64 %tmp90, %tmp86 ; <i64> [#uses=1] + %tmp92 = add i64 %tmp47, %tmp89 ; <i64> [#uses=1] + %tmp93 = add i64 %tmp92, %tmp91 ; <i64> [#uses=1] + %tmp94 = add i64 %tmp93, %tmp86 ; <i64> [#uses=2] + %tmp95 = add i64 %tmp75, %tmp62 ; <i64> [#uses=1] + %tmp96 = add i64 %tmp88, %tmp95 ; <i64> [#uses=1] + %tmp97 = add i64 %tmp75, %tmp62 ; <i64> [#uses=1] + %tmp98 = add i64 %tmp96, %tmp97 ; <i64> [#uses=1] + %tmp99 = add i64 %tmp94, %tmp49 ; <i64> [#uses=5] + %tmp100 = add i64 %tmp88, %tmp98 ; <i64> [#uses=1] + %tmp101 = add i64 %tmp100, %tmp94 ; <i64> [#uses=7] + %tmp102 = call i64 @llvm.bswap.i64(i64 %tmp14) ; <i64> [#uses=1] + %tmp103 = add i64 %tmp86, %tmp73 ; <i64> [#uses=1] + %tmp104 = add i64 %tmp103, %tmp99 ; <i64> [#uses=1] + %tmp105 = add i64 %tmp102, %tmp60 ; <i64> [#uses=1] + %tmp106 = add i64 %tmp105, %tmp104 ; <i64> [#uses=1] + %tmp107 = add i64 %tmp106, %tmp99 ; <i64> [#uses=2] + %tmp108 = add i64 %tmp88, %tmp75 ; <i64> [#uses=1] + %tmp109 = add i64 %tmp101, %tmp108 ; <i64> [#uses=1] + %tmp110 = add i64 %tmp88, %tmp75 ; <i64> [#uses=1] + %tmp111 = add i64 %tmp109, %tmp110 ; <i64> [#uses=1] + %tmp112 = add i64 %tmp107, %tmp62 ; <i64> [#uses=5] + %tmp113 = add i64 %tmp101, %tmp111 ; <i64> [#uses=1] + %tmp114 = add i64 %tmp113, %tmp107 ; <i64> [#uses=7] + %tmp115 = call i64 @llvm.bswap.i64(i64 %tmp15) ; <i64> [#uses=1] + %tmp116 = add i64 %tmp99, %tmp86 ; <i64> [#uses=1] + %tmp117 = add i64 %tmp116, %tmp112 ; <i64> [#uses=1] + %tmp118 = add i64 %tmp115, %tmp73 ; <i64> [#uses=1] + %tmp119 = add i64 %tmp118, %tmp117 ; <i64> [#uses=1] + %tmp120 = add i64 %tmp119, %tmp112 ; <i64> [#uses=2] + %tmp121 = add i64 %tmp101, %tmp88 ; <i64> [#uses=1] + %tmp122 = add i64 %tmp114, %tmp121 ; <i64> [#uses=1] + %tmp123 = add i64 %tmp101, %tmp88 ; <i64> [#uses=1] + %tmp124 = add i64 %tmp122, %tmp123 ; <i64> [#uses=1] + %tmp125 = add i64 %tmp120, %tmp75 ; <i64> [#uses=5] + %tmp126 = add i64 %tmp114, %tmp124 ; <i64> [#uses=1] + %tmp127 = add i64 %tmp126, %tmp120 ; <i64> [#uses=7] + %tmp128 = call i64 @llvm.bswap.i64(i64 %tmp16) ; <i64> [#uses=1] + %tmp129 = add i64 %tmp112, %tmp99 ; <i64> [#uses=1] + %tmp130 = add i64 %tmp129, %tmp125 ; <i64> [#uses=1] + %tmp131 = add i64 %tmp128, %tmp86 ; <i64> [#uses=1] + %tmp132 = add i64 %tmp131, %tmp130 ; <i64> [#uses=1] + %tmp133 = add i64 %tmp132, %tmp125 ; <i64> [#uses=2] + %tmp134 = add i64 %tmp114, %tmp101 ; <i64> [#uses=1] + %tmp135 = add i64 %tmp127, %tmp134 ; <i64> [#uses=1] + %tmp136 = add i64 %tmp114, %tmp101 ; <i64> [#uses=1] + %tmp137 = add i64 %tmp135, %tmp136 ; <i64> [#uses=1] + %tmp138 = add i64 %tmp133, %tmp88 ; <i64> [#uses=5] + %tmp139 = add i64 %tmp127, %tmp137 ; <i64> [#uses=1] + %tmp140 = add i64 %tmp139, %tmp133 ; <i64> [#uses=7] + %tmp141 = call i64 @llvm.bswap.i64(i64 %tmp17) ; <i64> [#uses=1] + %tmp142 = add i64 %tmp125, %tmp112 ; <i64> [#uses=1] + %tmp143 = add i64 %tmp142, %tmp138 ; <i64> [#uses=1] + %tmp144 = add i64 %tmp141, %tmp99 ; <i64> [#uses=1] + %tmp145 = add i64 %tmp144, %tmp143 ; <i64> [#uses=1] + %tmp146 = add i64 %tmp145, %tmp138 ; <i64> [#uses=2] + %tmp147 = add i64 %tmp127, %tmp114 ; <i64> [#uses=1] + %tmp148 = add i64 %tmp140, %tmp147 ; <i64> [#uses=1] + %tmp149 = add i64 %tmp127, %tmp114 ; <i64> [#uses=1] + %tmp150 = add i64 %tmp148, %tmp149 ; <i64> [#uses=1] + %tmp151 = add i64 %tmp146, %tmp101 ; <i64> [#uses=5] + %tmp152 = add i64 %tmp140, %tmp150 ; <i64> [#uses=1] + %tmp153 = add i64 %tmp152, %tmp146 ; <i64> [#uses=7] + %tmp154 = call i64 @llvm.bswap.i64(i64 %tmp18) ; <i64> [#uses=1] + %tmp155 = add i64 %tmp138, %tmp125 ; <i64> [#uses=1] + %tmp156 = add i64 %tmp155, %tmp151 ; <i64> [#uses=1] + %tmp157 = add i64 %tmp154, %tmp112 ; <i64> [#uses=1] + %tmp158 = add i64 %tmp157, %tmp156 ; <i64> [#uses=1] + %tmp159 = add i64 %tmp158, %tmp151 ; <i64> [#uses=2] + %tmp160 = add i64 %tmp140, %tmp127 ; <i64> [#uses=1] + %tmp161 = add i64 %tmp153, %tmp160 ; <i64> [#uses=1] + %tmp162 = add i64 %tmp140, %tmp127 ; <i64> [#uses=1] + %tmp163 = add i64 %tmp161, %tmp162 ; <i64> [#uses=1] + %tmp164 = add i64 %tmp159, %tmp114 ; <i64> [#uses=5] + %tmp165 = add i64 %tmp153, %tmp163 ; <i64> [#uses=1] + %tmp166 = add i64 %tmp165, %tmp159 ; <i64> [#uses=7] + %tmp167 = call i64 @llvm.bswap.i64(i64 %tmp19) ; <i64> [#uses=1] + %tmp168 = add i64 %tmp151, %tmp138 ; <i64> [#uses=1] + %tmp169 = add i64 %tmp168, %tmp164 ; <i64> [#uses=1] + %tmp170 = add i64 %tmp167, %tmp125 ; <i64> [#uses=1] + %tmp171 = add i64 %tmp170, %tmp169 ; <i64> [#uses=1] + %tmp172 = add i64 %tmp171, %tmp164 ; <i64> [#uses=2] + %tmp173 = add i64 %tmp153, %tmp140 ; <i64> [#uses=1] + %tmp174 = add i64 %tmp166, %tmp173 ; <i64> [#uses=1] + %tmp175 = add i64 %tmp153, %tmp140 ; <i64> [#uses=1] + %tmp176 = add i64 %tmp174, %tmp175 ; <i64> [#uses=1] + %tmp177 = add i64 %tmp172, %tmp127 ; <i64> [#uses=5] + %tmp178 = add i64 %tmp166, %tmp176 ; <i64> [#uses=1] + %tmp179 = add i64 %tmp178, %tmp172 ; <i64> [#uses=6] + %tmp180 = call i64 @llvm.bswap.i64(i64 %tmp20) ; <i64> [#uses=1] + %tmp181 = add i64 %tmp164, %tmp151 ; <i64> [#uses=1] + %tmp182 = add i64 %tmp181, %tmp177 ; <i64> [#uses=1] + %tmp183 = add i64 %tmp180, %tmp138 ; <i64> [#uses=1] + %tmp184 = add i64 %tmp183, %tmp182 ; <i64> [#uses=1] + %tmp185 = add i64 %tmp184, %tmp177 ; <i64> [#uses=2] + %tmp186 = add i64 %tmp166, %tmp153 ; <i64> [#uses=1] + %tmp187 = add i64 %tmp179, %tmp186 ; <i64> [#uses=1] + %tmp188 = add i64 %tmp166, %tmp153 ; <i64> [#uses=1] + %tmp189 = add i64 %tmp187, %tmp188 ; <i64> [#uses=1] + %tmp190 = add i64 %tmp185, %tmp140 ; <i64> [#uses=4] + %tmp191 = add i64 %tmp179, %tmp189 ; <i64> [#uses=1] + %tmp192 = add i64 %tmp191, %tmp185 ; <i64> [#uses=4] + %tmp193 = call i64 @llvm.bswap.i64(i64 %tmp21) ; <i64> [#uses=1] + %tmp194 = add i64 %tmp177, %tmp164 ; <i64> [#uses=1] + %tmp195 = add i64 %tmp194, %tmp190 ; <i64> [#uses=1] + %tmp196 = add i64 %tmp193, %tmp151 ; <i64> [#uses=1] + %tmp197 = add i64 %tmp196, %tmp195 ; <i64> [#uses=1] + %tmp198 = add i64 %tmp197, %tmp190 ; <i64> [#uses=2] + %tmp199 = add i64 %tmp179, %tmp166 ; <i64> [#uses=1] + %tmp200 = add i64 %tmp192, %tmp199 ; <i64> [#uses=1] + %tmp201 = add i64 %tmp179, %tmp166 ; <i64> [#uses=1] + %tmp202 = add i64 %tmp200, %tmp201 ; <i64> [#uses=1] + %tmp203 = add i64 %tmp198, %tmp153 ; <i64> [#uses=3] + %tmp204 = add i64 %tmp192, %tmp202 ; <i64> [#uses=1] + %tmp205 = add i64 %tmp204, %tmp198 ; <i64> [#uses=2] + %tmp206 = call i64 @llvm.bswap.i64(i64 %tmp22) ; <i64> [#uses=1] + %tmp207 = add i64 %tmp190, %tmp177 ; <i64> [#uses=1] + %tmp208 = add i64 %tmp207, %tmp203 ; <i64> [#uses=1] + %tmp209 = add i64 %tmp206, %tmp164 ; <i64> [#uses=1] + %tmp210 = add i64 %tmp209, %tmp208 ; <i64> [#uses=1] + %tmp211 = add i64 %tmp210, %tmp203 ; <i64> [#uses=2] + %tmp212 = add i64 %tmp192, %tmp179 ; <i64> [#uses=1] + %tmp213 = add i64 %tmp205, %tmp212 ; <i64> [#uses=1] + %tmp214 = add i64 %tmp192, %tmp179 ; <i64> [#uses=1] + %tmp215 = add i64 %tmp213, %tmp214 ; <i64> [#uses=1] + %tmp216 = add i64 %tmp211, %tmp166 ; <i64> [#uses=2] + %tmp217 = add i64 %tmp205, %tmp215 ; <i64> [#uses=1] + %tmp218 = add i64 %tmp217, %tmp211 ; <i64> [#uses=1] + %tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23) ; <i64> [#uses=2] + store volatile i64 %tmp219, i64* @X, align 8 + %tmp220 = add i64 %tmp203, %tmp190 ; <i64> [#uses=1] + %tmp221 = add i64 %tmp220, %tmp216 ; <i64> [#uses=1] + %tmp222 = add i64 %tmp219, %tmp177 ; <i64> [#uses=1] + %tmp223 = add i64 %tmp222, %tmp221 ; <i64> [#uses=1] + %tmp224 = add i64 %tmp223, %tmp216 ; <i64> [#uses=1] + %tmp225 = add i64 %tmp224, %tmp218 ; <i64> [#uses=1] + ret i64 %tmp225 +} + +declare i64 @llvm.bswap.i64(i64) nounwind readnone diff --git a/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll new file mode 100644 index 0000000000..ad18a0c5b9 --- /dev/null +++ b/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll @@ -0,0 +1,141 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded" +; XFAIL: * +; 69408 removed the opportunity for this optimization to work + + %struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 } + %struct.anon = type { [16 x i64] } +@K512 = external constant [80 x i64], align 32 ; <[80 x i64]*> [#uses=2] + +define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp { +entry: + br label %bb349 + +bb349: ; preds = %bb349, %entry + %e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ] ; <i64> [#uses=3] + %b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ] ; <i64> [#uses=2] + %asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind ; <i64> [#uses=1] + %0 = xor i64 0, %asmtmp356 ; <i64> [#uses=1] + %1 = add i64 0, %0 ; <i64> [#uses=1] + %2 = add i64 %1, 0 ; <i64> [#uses=1] + %3 = add i64 %2, 0 ; <i64> [#uses=1] + %4 = add i64 %3, 0 ; <i64> [#uses=5] + %asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind ; <i64> [#uses=1] + %asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind ; <i64> [#uses=0] + %5 = xor i64 %asmtmp372, 0 ; <i64> [#uses=0] + %6 = xor i64 0, %b.0472 ; <i64> [#uses=1] + %7 = and i64 %4, %6 ; <i64> [#uses=1] + %8 = xor i64 %7, 0 ; <i64> [#uses=1] + %9 = add i64 0, %8 ; <i64> [#uses=1] + %10 = add i64 %9, 0 ; <i64> [#uses=2] + %asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind ; <i64> [#uses=1] + %11 = xor i64 0, %asmtmp377 ; <i64> [#uses=1] + %12 = add i64 0, %11 ; <i64> [#uses=1] + %13 = add i64 %12, 0 ; <i64> [#uses=1] + %not381 = xor i64 0, -1 ; <i64> [#uses=1] + %14 = and i64 %e.0489, %not381 ; <i64> [#uses=1] + %15 = xor i64 0, %14 ; <i64> [#uses=1] + %16 = add i64 %15, 0 ; <i64> [#uses=1] + %17 = add i64 %16, %13 ; <i64> [#uses=1] + %18 = add i64 %17, 0 ; <i64> [#uses=1] + %19 = add i64 %18, 0 ; <i64> [#uses=2] + %20 = add i64 %19, %b.0472 ; <i64> [#uses=3] + %21 = add i64 %19, 0 ; <i64> [#uses=1] + %22 = add i64 %21, 0 ; <i64> [#uses=1] + %23 = add i32 0, 12 ; <i32> [#uses=1] + %24 = and i32 %23, 12 ; <i32> [#uses=1] + %25 = zext i32 %24 to i64 ; <i64> [#uses=1] + %26 = getelementptr [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0] + %27 = add i64 0, %e.0489 ; <i64> [#uses=1] + %28 = add i64 %27, 0 ; <i64> [#uses=1] + %29 = add i64 %28, 0 ; <i64> [#uses=1] + %30 = add i64 %29, 0 ; <i64> [#uses=2] + %31 = and i64 %10, %4 ; <i64> [#uses=1] + %32 = xor i64 0, %31 ; <i64> [#uses=1] + %33 = add i64 %30, 0 ; <i64> [#uses=3] + %34 = add i64 %30, %32 ; <i64> [#uses=1] + %35 = add i64 %34, 0 ; <i64> [#uses=1] + %36 = and i64 %33, %20 ; <i64> [#uses=1] + %37 = xor i64 %36, 0 ; <i64> [#uses=1] + %38 = add i64 %37, 0 ; <i64> [#uses=1] + %39 = add i64 %38, 0 ; <i64> [#uses=1] + %40 = add i64 %39, 0 ; <i64> [#uses=1] + %41 = add i64 %40, 0 ; <i64> [#uses=1] + %42 = add i64 %41, %4 ; <i64> [#uses=3] + %43 = or i32 0, 6 ; <i32> [#uses=1] + %44 = and i32 %43, 14 ; <i32> [#uses=1] + %45 = zext i32 %44 to i64 ; <i64> [#uses=1] + %46 = getelementptr [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1] + %not417 = xor i64 %42, -1 ; <i64> [#uses=1] + %47 = and i64 %20, %not417 ; <i64> [#uses=1] + %48 = xor i64 0, %47 ; <i64> [#uses=1] + %49 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1] + %50 = load i64* %49, align 8 ; <i64> [#uses=1] + %51 = add i64 %48, 0 ; <i64> [#uses=1] + %52 = add i64 %51, 0 ; <i64> [#uses=1] + %53 = add i64 %52, 0 ; <i64> [#uses=1] + %54 = add i64 %53, %50 ; <i64> [#uses=2] + %asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind ; <i64> [#uses=1] + %asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind ; <i64> [#uses=1] + %55 = xor i64 %asmtmp420, 0 ; <i64> [#uses=1] + %56 = xor i64 %55, %asmtmp421 ; <i64> [#uses=1] + %57 = add i64 %54, %10 ; <i64> [#uses=5] + %58 = add i64 %54, 0 ; <i64> [#uses=1] + %59 = add i64 %58, %56 ; <i64> [#uses=2] + %60 = or i32 0, 7 ; <i32> [#uses=1] + %61 = and i32 %60, 15 ; <i32> [#uses=1] + %62 = zext i32 %61 to i64 ; <i64> [#uses=1] + %63 = getelementptr [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2] + %64 = load i64* null, align 8 ; <i64> [#uses=1] + %65 = lshr i64 %64, 6 ; <i64> [#uses=1] + %66 = xor i64 0, %65 ; <i64> [#uses=1] + %67 = xor i64 %66, 0 ; <i64> [#uses=1] + %68 = load i64* %46, align 8 ; <i64> [#uses=1] + %69 = load i64* null, align 8 ; <i64> [#uses=1] + %70 = add i64 %68, 0 ; <i64> [#uses=1] + %71 = add i64 %70, %67 ; <i64> [#uses=1] + %72 = add i64 %71, %69 ; <i64> [#uses=1] + %asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind ; <i64> [#uses=1] + %asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind ; <i64> [#uses=1] + %73 = xor i64 %asmtmp427, 0 ; <i64> [#uses=1] + %74 = xor i64 %73, %asmtmp428 ; <i64> [#uses=1] + %75 = and i64 %57, %42 ; <i64> [#uses=1] + %not429 = xor i64 %57, -1 ; <i64> [#uses=1] + %76 = and i64 %33, %not429 ; <i64> [#uses=1] + %77 = xor i64 %75, %76 ; <i64> [#uses=1] + %78 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1] + %79 = load i64* %78, align 16 ; <i64> [#uses=1] + %80 = add i64 %77, %20 ; <i64> [#uses=1] + %81 = add i64 %80, %72 ; <i64> [#uses=1] + %82 = add i64 %81, %74 ; <i64> [#uses=1] + %83 = add i64 %82, %79 ; <i64> [#uses=1] + %asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind ; <i64> [#uses=1] + %asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind ; <i64> [#uses=1] + %84 = xor i64 %asmtmp432, 0 ; <i64> [#uses=1] + %85 = xor i64 %84, %asmtmp433 ; <i64> [#uses=1] + %86 = add i64 %83, %22 ; <i64> [#uses=2] + %87 = add i64 0, %85 ; <i64> [#uses=1] + %asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind ; <i64> [#uses=1] + %88 = xor i64 0, %asmtmp435 ; <i64> [#uses=1] + %89 = load i64* null, align 8 ; <i64> [#uses=3] + %asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind ; <i64> [#uses=1] + %asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind ; <i64> [#uses=1] + %90 = lshr i64 %89, 6 ; <i64> [#uses=1] + %91 = xor i64 %asmtmp436, %90 ; <i64> [#uses=1] + %92 = xor i64 %91, %asmtmp437 ; <i64> [#uses=1] + %93 = load i64* %63, align 8 ; <i64> [#uses=1] + %94 = load i64* null, align 8 ; <i64> [#uses=1] + %95 = add i64 %93, %88 ; <i64> [#uses=1] + %96 = add i64 %95, %92 ; <i64> [#uses=1] + %97 = add i64 %96, %94 ; <i64> [#uses=2] + store i64 %97, i64* %63, align 8 + %98 = and i64 %86, %57 ; <i64> [#uses=1] + %not441 = xor i64 %86, -1 ; <i64> [#uses=1] + %99 = and i64 %42, %not441 ; <i64> [#uses=1] + %100 = xor i64 %98, %99 ; <i64> [#uses=1] + %101 = add i64 %100, %33 ; <i64> [#uses=1] + %102 = add i64 %101, %97 ; <i64> [#uses=1] + %103 = add i64 %102, 0 ; <i64> [#uses=1] + %104 = add i64 %103, 0 ; <i64> [#uses=1] + %e.0 = add i64 %104, %35 ; <i64> [#uses=1] + br label %bb349 +} diff --git a/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll new file mode 100644 index 0000000000..eb4a5c04a2 --- /dev/null +++ b/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt + +define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp { +entry: + switch i8 undef, label %bb6 [ + i8 9, label %bb5 + i8 32, label %bb5 + i8 10, label %bb5 + i8 13, label %bb5 + i8 12, label %bb5 + ] + +bb5: ; preds = %entry, %entry, %entry, %entry, %entry + br label %bb6 + +bb6: ; preds = %bb5, %entry + br i1 undef, label %bb7, label %bb9 + +bb7: ; preds = %bb6 + unreachable + +bb9: ; preds = %bb6 + %0 = load i8* undef, align 1 ; <i8> [#uses=3] + br i1 undef, label %bb12, label %bb10 + +bb10: ; preds = %bb9 + br i1 undef, label %bb12, label %bb11 + +bb11: ; preds = %bb10 + unreachable + +bb12: ; preds = %bb10, %bb9 + br i1 undef, label %bb13, label %bb14 + +bb13: ; preds = %bb12 + store i8 %0, i8* undef, align 1 + %1 = zext i8 %0 to i32 ; <i32> [#uses=1] + br label %bb18 + +bb14: ; preds = %bb12 + br label %bb18 + +bb18: ; preds = %bb14, %bb13 + %termcode.0 = phi i32 [ %1, %bb13 ], [ undef, %bb14 ] ; <i32> [#uses=2] + %2 = icmp eq i8 %0, 0 ; <i1> [#uses=1] + br i1 %2, label %bb21, label %bb19 + +bb19: ; preds = %bb18 + br i1 undef, label %bb21, label %bb20 + +bb20: ; preds = %bb19 + br label %bb21 + +bb21: ; preds = %bb20, %bb19, %bb18 + %termcode.1 = phi i32 [ %termcode.0, %bb18 ], [ %termcode.0, %bb19 ], [ undef, %bb20 ] ; <i32> [#uses=0] + unreachable +} diff --git a/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll new file mode 100644 index 0000000000..47ef693cc2 --- /dev/null +++ b/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s +; +; This test should not cause any spilling with RAFast. +; +; CHECK: Number of copies coalesced +; CHECK-NOT: Number of stores added +; +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%0 = type { i64, i64, i8*, i8* } +%1 = type opaque +%2 = type opaque +%3 = type <{ i8*, i32, i32, void (%4*)*, i8*, i64 }> +%4 = type { i8**, i32, i32, i8**, %5*, i64 } +%5 = type { i64, i64 } +%6 = type { i8*, i32, i32, i8*, %5* } + +@0 = external hidden constant %0 + +define hidden void @f() ssp { +bb: + %tmp5 = alloca i64, align 8 + %tmp6 = alloca void ()*, align 8 + %tmp7 = alloca %3, align 8 + store i64 0, i64* %tmp5, align 8 + br label %bb8 + +bb8: ; preds = %bb23, %bb + %tmp15 = getelementptr inbounds %3* %tmp7, i32 0, i32 4 + store i8* bitcast (%0* @0 to i8*), i8** %tmp15 + %tmp16 = bitcast %3* %tmp7 to void ()* + store void ()* %tmp16, void ()** %tmp6, align 8 + %tmp17 = load void ()** %tmp6, align 8 + %tmp18 = bitcast void ()* %tmp17 to %6* + %tmp19 = getelementptr inbounds %6* %tmp18, i32 0, i32 3 + %tmp20 = bitcast %6* %tmp18 to i8* + %tmp21 = load i8** %tmp19 + %tmp22 = bitcast i8* %tmp21 to void (i8*)* + call void %tmp22(i8* %tmp20) + br label %bb23 + +bb23: ; preds = %bb8 + %tmp24 = load i64* %tmp5, align 8 + %tmp25 = add i64 %tmp24, 1 + store i64 %tmp25, i64* %tmp5, align 8 + %tmp26 = icmp ult i64 %tmp25, 10 + br i1 %tmp26, label %bb8, label %bb27 + +bb27: ; preds = %bb23 + ret void +} diff --git a/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll new file mode 100644 index 0000000000..18a3313773 --- /dev/null +++ b/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \ +; RUN: not grep "Number of machine instructions hoisted out of loops post regalloc" + +; rdar://11095580 + +%struct.ref_s = type { %union.color_sample, i16, i16 } +%union.color_sample = type { i64 } + +@table = external global [3891 x i64] + +declare i32 @foo() + +define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp { +entry: + %call = tail call i32 @foo() + %tmp = ashr i32 %call, 31 + %0 = and i32 %tmp, 1396 + %index9 = add i32 %0, 2397 + indirectbr i8* undef, [label %return, label %if.end] + +if.end: ; preds = %entry + %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2 + %tmp6 = load i16* %size5, align 2 + %tobool1 = icmp eq i16 %tmp6, 0 + %1 = select i1 %tobool1, i32 1396, i32 -1910 + %index10 = add i32 %index9, %1 + indirectbr i8* undef, [label %return, label %while.body.lr.ph] + +while.body.lr.ph: ; preds = %if.end + %refs = bitcast %struct.ref_s* %op to %struct.ref_s** + %tmp9 = load %struct.ref_s** %refs, align 8 + %tmp4 = zext i16 %tmp6 to i64 + %index13 = add i32 %index10, 1658 + %2 = sext i32 %index13 to i64 + %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2 + %blockaddress14 = load i64* %3, align 8 + %4 = inttoptr i64 %blockaddress14 to i8* + indirectbr i8* %4, [label %while.body] + +while.body: ; preds = %while.body, %while.body.lr.ph + %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ] + %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ] + %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1 + store i16 32, i16* %type_attrs, align 2 + %indvar.next = add i64 %indvar, 1 + %exitcond5 = icmp eq i64 %indvar.next, %tmp4 + %tmp7 = select i1 %exitcond5, i32 1648, i32 0 + %index15 = add i32 %index7, %tmp7 + %tmp8 = select i1 %exitcond5, i64 13, i64 0 + %5 = sext i32 %index15 to i64 + %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5 + %blockaddress16 = load i64* %6, align 8 + %7 = inttoptr i64 %blockaddress16 to i8* + indirectbr i8* %7, [label %return, label %while.body] + +return: ; preds = %while.body, %if.end, %entry + %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ] + ret i32 %retval.0 +} diff --git a/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll b/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll new file mode 100644 index 0000000000..33141680aa --- /dev/null +++ b/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink" + +define fastcc void @t() nounwind ssp { +entry: + br i1 undef, label %bb, label %bb4 + +bb: ; preds = %entry + br i1 undef, label %return, label %bb3 + +bb3: ; preds = %bb + unreachable + +bb4: ; preds = %entry + br i1 undef, label %bb.nph, label %return + +bb.nph: ; preds = %bb4 + br label %bb5 + +bb5: ; preds = %bb9, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1] + %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2] + %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0] + br i1 undef, label %bb9, label %bb6 + +bb6: ; preds = %bb5 + br i1 undef, label %bb9, label %bb7 + +bb7: ; preds = %bb6 + br i1 undef, label %bb9, label %bb8 + +bb8: ; preds = %bb7 + unreachable + +bb9: ; preds = %bb7, %bb6, %bb5 + br i1 undef, label %bb5, label %return + +return: ; preds = %bb9, %bb4, %bb + ret void +} diff --git a/test/CodeGen/X86/Stats/constant-pool-remat-0.ll b/test/CodeGen/X86/Stats/constant-pool-remat-0.ll new file mode 100644 index 0000000000..4be14d2128 --- /dev/null +++ b/test/CodeGen/X86/Stats/constant-pool-remat-0.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; CHECK: LCPI +; CHECK: LCPI +; CHECK: LCPI +; CHECK-NOT: LCPI + +; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat +; X64stat: 6 asm-printer + +; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat +; X32stat: 12 asm-printer + +declare float @qux(float %y) + +define float @array(float %a) nounwind { + %n = fmul float %a, 9.0 + %m = call float @qux(float %n) + %o = fmul float %m, 9.0 + ret float %o +} diff --git a/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll new file mode 100644 index 0000000000..064ee364d1 --- /dev/null +++ b/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS +; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS +; STATS: 9 asm-printer + +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s +; CHECK: leal 1({{%rsi|%rdx}}), + +define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize { +entry: + %0 = add i32 %i2, 1 ; <i32> [#uses=1] + %1 = sext i32 %0 to i64 ; <i64> [#uses=1] + %2 = getelementptr i8* %ptr, i64 %1 ; <i8*> [#uses=1] + %3 = load i8* %2, align 1 ; <i8> [#uses=1] + %4 = icmp eq i8 0, %3 ; <i1> [#uses=1] + br i1 %4, label %bb3, label %bb34 + +bb3: ; preds = %entry + %5 = add i32 %i2, 4 ; <i32> [#uses=0] + %6 = trunc i32 %5 to i8 + ret i8 %6 + +bb34: ; preds = %entry + ret i8 0 +} + diff --git a/test/CodeGen/X86/Stats/dagcombine-cse.ll b/test/CodeGen/X86/Stats/dagcombine-cse.ll new file mode 100644 index 0000000000..af69531246 --- /dev/null +++ b/test/CodeGen/X86/Stats/dagcombine-cse.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14 + +define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind { +entry: + %tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2] + %tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1] + %tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9 ; <i8*> [#uses=1] + %tmp1112 = bitcast i8* %tmp11 to i32* ; <i32*> [#uses=1] + %tmp13 = load i32* %tmp1112, align 4 ; <i32> [#uses=1] + %tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1] + %tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1] + %tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum ; <i8*> [#uses=1] + %tmp2122 = bitcast i8* %tmp21 to i16* ; <i16*> [#uses=1] + %tmp23 = load i16* %tmp2122, align 2 ; <i16> [#uses=1] + %tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1] + %tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1] + %tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1] + %tmp29 = or i64 %tmp26, %tmp2728 ; <i64> [#uses=1] + %tmp3454 = bitcast i64 %tmp29 to double ; <double> [#uses=1] + %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1] + %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1] + %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1] + %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1] + %tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1] + ret i32 %tmp48 +} diff --git a/test/CodeGen/X86/Stats/hoist-invariant-load.ll b/test/CodeGen/X86/Stats/hoist-invariant-load.ll new file mode 100644 index 0000000000..74ecd045b3 --- /dev/null +++ b/test/CodeGen/X86/Stats/hoist-invariant-load.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm" + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.7.2" + +@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1 +@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip" +@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip" +@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata" + +define void @test(i8* %x) uwtable ssp { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0 + %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0) + %inc = add i32 %i.01, 1 + %exitcond = icmp eq i32 %inc, 10000 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind + +!0 = metadata !{} diff --git a/test/CodeGen/X86/Stats/licm-nested.ll b/test/CodeGen/X86/Stats/licm-nested.ll new file mode 100644 index 0000000000..c3f991d7a9 --- /dev/null +++ b/test/CodeGen/X86/Stats/licm-nested.ll @@ -0,0 +1,89 @@ +; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3 + +; MachineLICM should be able to hoist the symbolic addresses out of +; the inner loops. + +@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3] +@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1] + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + %cmp = icmp eq i32 %argc, 2 ; <i1> [#uses=1] + br i1 %cmp, label %while.cond.preheader, label %bb.nph53 + +while.cond.preheader: ; preds = %entry + %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1] + %tmp2 = load i8** %arrayidx ; <i8*> [#uses=1] + %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2] + %tobool51 = icmp eq i32 %call, 0 ; <i1> [#uses=1] + br i1 %tobool51, label %while.end, label %bb.nph53 + +while.cond.loopexit: ; preds = %for.inc35 + %indvar.next77 = add i32 %indvar76, 1 ; <i32> [#uses=2] + %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; <i1> [#uses=1] + br i1 %exitcond78, label %while.end, label %bb.nph + +bb.nph53: ; preds = %entry, %while.cond.preheader + %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; <i32> [#uses=1] + br label %bb.nph + +bb.nph: ; preds = %while.cond.loopexit, %bb.nph53 + %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; <i32> [#uses=1] + br label %for.body + +for.body: ; preds = %for.body, %bb.nph + %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2] + %tmp = add i64 %indvar, 2 ; <i64> [#uses=1] + %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1] + store i8 1, i8* %arrayidx10 + %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2] + %exitcond = icmp eq i64 %indvar.next, 8191 ; <i1> [#uses=1] + br i1 %exitcond, label %for.body15, label %for.body + +for.body15: ; preds = %for.body, %for.inc35 + %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; <i64> [#uses=4] + %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; <i32> [#uses=2] + %tmp68 = add i64 %indvar57, 2 ; <i64> [#uses=2] + %tmp70 = mul i64 %indvar57, 3 ; <i64> [#uses=1] + %tmp71 = add i64 %tmp70, 6 ; <i64> [#uses=1] + %tmp73 = shl i64 %indvar57, 1 ; <i64> [#uses=1] + %add = add i64 %tmp73, 4 ; <i64> [#uses=2] + %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1] + %tmp18 = load i8* %arrayidx17 ; <i8> [#uses=1] + %tobool19 = icmp eq i8 %tmp18, 0 ; <i1> [#uses=1] + br i1 %tobool19, label %for.inc35, label %if.then + +if.then: ; preds = %for.body15 + %cmp2443 = icmp slt i64 %add, 8193 ; <i1> [#uses=1] + br i1 %cmp2443, label %for.body25, label %for.end32 + +for.body25: ; preds = %if.then, %for.body25 + %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2] + %tmp60 = mul i64 %tmp68, %indvar55 ; <i64> [#uses=2] + %tmp75 = add i64 %add, %tmp60 ; <i64> [#uses=1] + %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1] + store i8 0, i8* %arrayidx27 + %add31 = add i64 %tmp71, %tmp60 ; <i64> [#uses=1] + %cmp24 = icmp slt i64 %add31, 8193 ; <i1> [#uses=1] + %indvar.next56 = add i64 %indvar55, 1 ; <i64> [#uses=1] + br i1 %cmp24, label %for.body25, label %for.end32 + +for.end32: ; preds = %for.body25, %if.then + %inc34 = add nsw i32 %count.248, 1 ; <i32> [#uses=1] + br label %for.inc35 + +for.inc35: ; preds = %for.body15, %for.end32 + %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; <i32> [#uses=2] + %indvar.next58 = add i64 %indvar57, 1 ; <i64> [#uses=2] + %exitcond67 = icmp eq i64 %indvar.next58, 8191 ; <i1> [#uses=1] + br i1 %exitcond67, label %while.cond.loopexit, label %for.body15 + +while.end: ; preds = %while.cond.loopexit, %while.cond.preheader + %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1] + %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0] + ret i32 0 +} + +declare i32 @atoi(i8* nocapture) nounwind readonly + +declare i32 @printf(i8* nocapture, ...) nounwind diff --git a/test/CodeGen/X86/Stats/lit.local.cfg b/test/CodeGen/X86/Stats/lit.local.cfg new file mode 100644 index 0000000000..1a5fd5ec86 --- /dev/null +++ b/test/CodeGen/X86/Stats/lit.local.cfg @@ -0,0 +1,8 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'X86' in targets: + config.unsupported = True + +if not config.root.enable_assertions: + config.unsupported = True diff --git a/test/CodeGen/X86/Stats/phi-immediate-factoring.ll b/test/CodeGen/X86/Stats/phi-immediate-factoring.ll new file mode 100644 index 0000000000..476bb10998 --- /dev/null +++ b/test/CodeGen/X86/Stats/phi-immediate-factoring.ll @@ -0,0 +1,54 @@ +; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6 +; PR1296 + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "i686-apple-darwin8" + +define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind { +entry: + switch i32 %A, label %out [ + i32 1, label %bb + i32 0, label %bb13 + i32 2, label %bb35 + ] + +bb: ; preds = %cond_next, %entry + %i.144.1 = phi i32 [ 0, %entry ], [ %tmp7, %cond_next ] ; <i32> [#uses=2] + %tmp4 = and i32 %i.144.1, %B ; <i32> [#uses=1] + icmp eq i32 %tmp4, 0 ; <i1>:0 [#uses=1] + br i1 %0, label %cond_next, label %out + +cond_next: ; preds = %bb + %tmp7 = add i32 %i.144.1, 1 ; <i32> [#uses=2] + icmp slt i32 %tmp7, 1000 ; <i1>:1 [#uses=1] + br i1 %1, label %bb, label %out + +bb13: ; preds = %cond_next18, %entry + %i.248.1 = phi i32 [ 0, %entry ], [ %tmp20, %cond_next18 ] ; <i32> [#uses=2] + %tmp16 = and i32 %i.248.1, %C ; <i32> [#uses=1] + icmp eq i32 %tmp16, 0 ; <i1>:2 [#uses=1] + br i1 %2, label %cond_next18, label %out + +cond_next18: ; preds = %bb13 + %tmp20 = add i32 %i.248.1, 1 ; <i32> [#uses=2] + icmp slt i32 %tmp20, 1000 ; <i1>:3 [#uses=1] + br i1 %3, label %bb13, label %out + +bb27: ; preds = %bb35 + %tmp30 = and i32 %i.3, %C ; <i32> [#uses=1] + icmp eq i32 %tmp30, 0 ; <i1>:4 [#uses=1] + br i1 %4, label %cond_next32, label %out + +cond_next32: ; preds = %bb27 + %indvar.next = add i32 %i.3, 1 ; <i32> [#uses=1] + br label %bb35 + +bb35: ; preds = %entry, %cond_next32 + %i.3 = phi i32 [ %indvar.next, %cond_next32 ], [ 0, %entry ] ; <i32> [#uses=3] + icmp slt i32 %i.3, 1000 ; <i1>:5 [#uses=1] + br i1 %5, label %bb27, label %out + +out: ; preds = %bb27, %bb35, %bb13, %cond_next18, %bb, %cond_next, %entry + %result.0 = phi i32 [ 0, %entry ], [ 1, %bb ], [ 0, %cond_next ], [ 1, %bb13 ], [ 0, %cond_next18 ], [ 1, %bb27 ], [ 0, %bb35 ] ; <i32> [#uses=1] + ret i32 %result.0 +} diff --git a/test/CodeGen/X86/Stats/pr3522.ll b/test/CodeGen/X86/Stats/pr3522.ll new file mode 100644 index 0000000000..d8f37781fc --- /dev/null +++ b/test/CodeGen/X86/Stats/pr3522.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk" +; PR3522 + +target triple = "i386-pc-linux-gnu" +@.str = external constant [13 x i8] ; <[13 x i8]*> [#uses=1] + +define void @_ada_c34018a() { +entry: + %0 = tail call i32 @report__ident_int(i32 90) ; <i32> [#uses=1] + %1 = trunc i32 %0 to i8 ; <i8> [#uses=1] + invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn + to label %invcont unwind label %lpad + +invcont: ; preds = %entry + unreachable + +bb22: ; preds = %lpad + ret void + +return: ; preds = %lpad + ret void + +lpad: ; preds = %entry + %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0 + cleanup + %2 = icmp eq i8 %1, 90 ; <i1> [#uses=1] + br i1 %2, label %return, label %bb22 +} + +declare void @__gnat_rcheck_12(i8*, i32) noreturn + +declare i32 @report__ident_int(i32) + +declare i32 @__gxx_personality_v0(...) diff --git a/test/CodeGen/X86/Stats/regpressure.ll b/test/CodeGen/X86/Stats/regpressure.ll new file mode 100644 index 0000000000..52d7b56f18 --- /dev/null +++ b/test/CodeGen/X86/Stats/regpressure.ll @@ -0,0 +1,114 @@ +;; Both functions in this testcase should codegen to the same function, and +;; neither of them should require spilling anything to the stack. + +; RUN: llc < %s -march=x86 -stats 2>&1 | \ +; RUN: not grep "Number of register spills" + +;; This can be compiled to use three registers if the loads are not +;; folded into the multiplies, 2 registers otherwise. + +define i32 @regpressure1(i32* %P) { + %A = load i32* %P ; <i32> [#uses=1] + %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1] + %B = load i32* %Bp ; <i32> [#uses=1] + %s1 = mul i32 %A, %B ; <i32> [#uses=1] + %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1] + %C = load i32* %Cp ; <i32> [#uses=1] + %s2 = mul i32 %s1, %C ; <i32> [#uses=1] + %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1] + %D = load i32* %Dp ; <i32> [#uses=1] + %s3 = mul i32 %s2, %D ; <i32> [#uses=1] + %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1] + %E = load i32* %Ep ; <i32> [#uses=1] + %s4 = mul i32 %s3, %E ; <i32> [#uses=1] + %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1] + %F = load i32* %Fp ; <i32> [#uses=1] + %s5 = mul i32 %s4, %F ; <i32> [#uses=1] + %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1] + %G = load i32* %Gp ; <i32> [#uses=1] + %s6 = mul i32 %s5, %G ; <i32> [#uses=1] + %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1] + %H = load i32* %Hp ; <i32> [#uses=1] + %s7 = mul i32 %s6, %H ; <i32> [#uses=1] + %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1] + %I = load i32* %Ip ; <i32> [#uses=1] + %s8 = mul i32 %s7, %I ; <i32> [#uses=1] + %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1] + %J = load i32* %Jp ; <i32> [#uses=1] + %s9 = mul i32 %s8, %J ; <i32> [#uses=1] + ret i32 %s9 +} + +define i32 @regpressure2(i32* %P) { + %A = load i32* %P ; <i32> [#uses=1] + %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1] + %B = load i32* %Bp ; <i32> [#uses=1] + %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1] + %C = load i32* %Cp ; <i32> [#uses=1] + %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1] + %D = load i32* %Dp ; <i32> [#uses=1] + %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1] + %E = load i32* %Ep ; <i32> [#uses=1] + %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1] + %F = load i32* %Fp ; <i32> [#uses=1] + %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1] + %G = load i32* %Gp ; <i32> [#uses=1] + %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1] + %H = load i32* %Hp ; <i32> [#uses=1] + %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1] + %I = load i32* %Ip ; <i32> [#uses=1] + %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1] + %J = load i32* %Jp ; <i32> [#uses=1] + %s1 = mul i32 %A, %B ; <i32> [#uses=1] + %s2 = mul i32 %s1, %C ; <i32> [#uses=1] + %s3 = mul i32 %s2, %D ; <i32> [#uses=1] + %s4 = mul i32 %s3, %E ; <i32> [#uses=1] + %s5 = mul i32 %s4, %F ; <i32> [#uses=1] + %s6 = mul i32 %s5, %G ; <i32> [#uses=1] + %s7 = mul i32 %s6, %H ; <i32> [#uses=1] + %s8 = mul i32 %s7, %I ; <i32> [#uses=1] + %s9 = mul i32 %s8, %J ; <i32> [#uses=1] + ret i32 %s9 +} + +define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) { + %A = load i16* %P ; <i16> [#uses=1] + %Bp = getelementptr i16* %P, i32 1 ; <i16*> [#uses=1] + %B = load i16* %Bp ; <i16> [#uses=1] + %Cp = getelementptr i16* %P, i32 2 ; <i16*> [#uses=1] + %C = load i16* %Cp ; <i16> [#uses=1] + %Dp = getelementptr i16* %P, i32 3 ; <i16*> [#uses=1] + %D = load i16* %Dp ; <i16> [#uses=1] + %Ep = getelementptr i16* %P, i32 4 ; <i16*> [#uses=1] + %E = load i16* %Ep ; <i16> [#uses=1] + %Fp = getelementptr i16* %P, i32 5 ; <i16*> [#uses=1] + %F = load i16* %Fp ; <i16> [#uses=1] + %Gp = getelementptr i16* %P, i32 6 ; <i16*> [#uses=1] + %G = load i16* %Gp ; <i16> [#uses=1] + %Hp = getelementptr i16* %P, i32 7 ; <i16*> [#uses=1] + %H = load i16* %Hp ; <i16> [#uses=1] + %Ip = getelementptr i16* %P, i32 8 ; <i16*> [#uses=1] + %I = load i16* %Ip ; <i16> [#uses=1] + %Jp = getelementptr i16* %P, i32 9 ; <i16*> [#uses=1] + %J = load i16* %Jp ; <i16> [#uses=1] + %A.upgrd.1 = sext i16 %A to i32 ; <i32> [#uses=1] + %B.upgrd.2 = sext i16 %B to i32 ; <i32> [#uses=1] + %D.upgrd.3 = sext i16 %D to i32 ; <i32> [#uses=1] + %C.upgrd.4 = sext i16 %C to i32 ; <i32> [#uses=1] + %E.upgrd.5 = sext i16 %E to i32 ; <i32> [#uses=1] + %F.upgrd.6 = sext i16 %F to i32 ; <i32> [#uses=1] + %G.upgrd.7 = sext i16 %G to i32 ; <i32> [#uses=1] + %H.upgrd.8 = sext i16 %H to i32 ; <i32> [#uses=1] + %I.upgrd.9 = sext i16 %I to i32 ; <i32> [#uses=1] + %J.upgrd.10 = sext i16 %J to i32 ; <i32> [#uses=1] + %s1 = add i32 %A.upgrd.1, %B.upgrd.2 ; <i32> [#uses=1] + %s2 = add i32 %C.upgrd.4, %s1 ; <i32> [#uses=1] + %s3 = add i32 %D.upgrd.3, %s2 ; <i32> [#uses=1] + %s4 = add i32 %E.upgrd.5, %s3 ; <i32> [#uses=1] + %s5 = add i32 %F.upgrd.6, %s4 ; <i32> [#uses=1] + %s6 = add i32 %G.upgrd.7, %s5 ; <i32> [#uses=1] + %s7 = add i32 %H.upgrd.8, %s6 ; <i32> [#uses=1] + %s8 = add i32 %I.upgrd.9, %s7 ; <i32> [#uses=1] + %s9 = add i32 %J.upgrd.10, %s8 ; <i32> [#uses=1] + ret i32 %s9 +} diff --git a/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll b/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll new file mode 100644 index 0000000000..af6d47af7a --- /dev/null +++ b/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \ +; RUN: grep "twoaddrinstr" | grep "Number of instructions aggressively commuted" +; rdar://6480363 + +target triple = "i386-apple-darwin9.6" + +define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone { +entry: + %tmp.i3 = bitcast <2 x double> %B to <2 x i64> ; <<2 x i64>> [#uses=1] + %tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458> ; <<2 x i64>> [#uses=1] + %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double> ; <<2 x double>> [#uses=1] + %tmp.i2 = fadd <2 x double> %tmp3.i, %A ; <<2 x double>> [#uses=1] + %tmp.i = fadd <2 x double> %tmp.i2, %C ; <<2 x double>> [#uses=1] + ret <2 x double> %tmp.i +} diff --git a/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll b/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll new file mode 100644 index 0000000000..513c304e3b --- /dev/null +++ b/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk" + +define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind { +entry: + %tmp25 = bitcast <2 x i64> %a1 to <8 x i16> ; <<8 x i16>> [#uses=1] + br label %bb +bb: ; preds = %bb, %entry + %skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3] + %vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3 ; <i32> [#uses=3] + %vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1] + %vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1] + %skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec ; <i8*> [#uses=1] + %vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1 ; <i32> [#uses=1] + %tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43 ; <<2 x i64>*> [#uses=1] + %tmp8 = load <2 x i64>* %tmp7, align 16 ; <<2 x i64>> [#uses=1] + %tmp11 = load <2 x i64>* %vDct_addr.0, align 16 ; <<2 x i64>> [#uses=1] + %tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16> ; <<8 x i16>> [#uses=1] + %tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1] + %tmp26 = mul <8 x i16> %tmp25, %tmp16 ; <<8 x i16>> [#uses=1] + %tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64> ; <<2 x i64>> [#uses=1] + store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16 + %tmp37 = load i8* %skiplist_addr.0, align 1 ; <i8> [#uses=1] + %tmp38 = icmp eq i8 %tmp37, 0 ; <i1> [#uses=1] + %indvar.next = add i32 %skiplist_addr.0.rec, 1 ; <i32> [#uses=1] + br i1 %tmp38, label %return, label %bb +return: ; preds = %bb + ret void +} diff --git a/test/CodeGen/X86/Stats/vec_insert-6.ll b/test/CodeGen/X86/Stats/vec_insert-6.ll new file mode 100644 index 0000000000..2a4864a48a --- /dev/null +++ b/test/CodeGen/X86/Stats/vec_insert-6.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq +; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6 + +define <4 x float> @t3(<4 x float>* %P) nounwind { + %tmp1 = load <4 x float>* %P + %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 > + ret <4 x float> %tmp2 +} diff --git a/test/CodeGen/X86/Stats/vec_shuffle-19.ll b/test/CodeGen/X86/Stats/vec_shuffle-19.ll new file mode 100644 index 0000000000..b26f920e5e --- /dev/null +++ b/test/CodeGen/X86/Stats/vec_shuffle-19.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4 +; PR2485 + +define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind { +entry: + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 > ; <<4 x i32>> [#uses=1] + ret <4 x i32> %shuffle +} diff --git a/test/CodeGen/X86/Stats/vec_shuffle-20.ll b/test/CodeGen/X86/Stats/vec_shuffle-20.ll new file mode 100644 index 0000000000..b6b8ba6f84 --- /dev/null +++ b/test/CodeGen/X86/Stats/vec_shuffle-20.ll @@ -0,0 +1,7 @@ +; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2 + +define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind { +entry: + shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:0 [#uses=1] + ret <4 x float> %0 +} diff --git a/test/CodeGen/X86/Stats/zero-remat.ll b/test/CodeGen/X86/Stats/zero-remat.ll new file mode 100644 index 0000000000..4242530f77 --- /dev/null +++ b/test/CodeGen/X86/Stats/zero-remat.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12 +; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 + +declare void @bar(double %x) +declare void @barf(float %x) + +define double @foo() nounwind { + + call void @bar(double 0.0) + ret double 0.0 + +;CHECK-32: foo: +;CHECK-32: call +;CHECK-32: fldz +;CHECK-32: ret + +;CHECK-64: foo: +;CHECK-64: xorps +;CHECK-64: call +;CHECK-64: xorps +;CHECK-64: ret +} + + +define float @foof() nounwind { + call void @barf(float 0.0) + ret float 0.0 + +;CHECK-32: foof: +;CHECK-32: call +;CHECK-32: fldz +;CHECK-32: ret + +;CHECK-64: foof: +;CHECK-64: xorps +;CHECK-64: call +;CHECK-64: xorps +;CHECK-64: ret +} |