aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/X86/Stats
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/X86/Stats')
-rw-r--r--test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll18
-rw-r--r--test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll12
-rw-r--r--test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll76
-rw-r--r--test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll23
-rw-r--r--test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll24
-rw-r--r--test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll51
-rw-r--r--test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll219
-rw-r--r--test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll51
-rw-r--r--test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll14
-rw-r--r--test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll56
-rw-r--r--test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll242
-rw-r--r--test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll141
-rw-r--r--test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll57
-rw-r--r--test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll52
-rw-r--r--test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll59
-rw-r--r--test/CodeGen/X86/Stats/MachineSink-PHIUse.ll39
-rw-r--r--test/CodeGen/X86/Stats/constant-pool-remat-0.ll22
-rw-r--r--test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll26
-rw-r--r--test/CodeGen/X86/Stats/dagcombine-cse.ll27
-rw-r--r--test/CodeGen/X86/Stats/hoist-invariant-load.ll29
-rw-r--r--test/CodeGen/X86/Stats/licm-nested.ll89
-rw-r--r--test/CodeGen/X86/Stats/lit.local.cfg8
-rw-r--r--test/CodeGen/X86/Stats/phi-immediate-factoring.ll54
-rw-r--r--test/CodeGen/X86/Stats/pr3522.ll34
-rw-r--r--test/CodeGen/X86/Stats/regpressure.ll114
-rw-r--r--test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll15
-rw-r--r--test/CodeGen/X86/Stats/twoaddr-pass-sink.ll29
-rw-r--r--test/CodeGen/X86/Stats/vec_insert-6.ll8
-rw-r--r--test/CodeGen/X86/Stats/vec_shuffle-19.ll8
-rw-r--r--test/CodeGen/X86/Stats/vec_shuffle-20.ll7
-rw-r--r--test/CodeGen/X86/Stats/zero-remat.ll40
31 files changed, 1644 insertions, 0 deletions
diff --git a/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll b/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll
new file mode 100644
index 0000000000..0af2445d7f
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2003-08-03-CallArgLiveRanges.ll
@@ -0,0 +1,18 @@
+; The old instruction selector used to load all arguments to a call up in
+; registers, then start pushing them all onto the stack. This is bad news as
+; it makes a ton of annoying overlapping live ranges. This code should not
+; cause spills!
+;
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep spilled
+
+target datalayout = "e-p:32:32"
+
+define i32 @test(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) {
+ ret i32 0
+}
+
+define i32 @main() {
+ %X = call i32 @test( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ) ; <i32> [#uses=1]
+ ret i32 %X
+}
+
diff --git a/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll b/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll
new file mode 100644
index 0000000000..1a3d74918d
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2006-03-02-InstrSchedBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
+; RUN: grep asm-printer | grep 7
+
+define i32 @g(i32 %a, i32 %b) nounwind {
+ %tmp.1 = shl i32 %b, 1 ; <i32> [#uses=1]
+ %tmp.3 = add i32 %tmp.1, %a ; <i32> [#uses=1]
+ %tmp.5 = mul i32 %tmp.3, %a ; <i32> [#uses=1]
+ %tmp.8 = mul i32 %b, %b ; <i32> [#uses=1]
+ %tmp.9 = add i32 %tmp.5, %tmp.8 ; <i32> [#uses=1]
+ ret i32 %tmp.9
+}
+
diff --git a/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll b/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll
new file mode 100644
index 0000000000..5cba3efeef
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2006-05-01-SchedCausingSpills.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | \
+; RUN: not grep "Number of register spills"
+; END.
+
+
+define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {
+ %tmp44 = load <4 x float>* %a ; <<4 x float>> [#uses=9]
+ %tmp46 = load <4 x float>* %b ; <<4 x float>> [#uses=1]
+ %tmp48 = load <4 x float>* %c ; <<4 x float>> [#uses=1]
+ %tmp50 = load <4 x float>* %d ; <<4 x float>> [#uses=1]
+ %tmp51 = bitcast <4 x float> %tmp44 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2]
+ %tmp52 = bitcast <4 x i32> %tmp to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp60 = xor <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
+ %tmp61 = bitcast <4 x i32> %tmp60 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp74 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp52, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp75 = bitcast <4 x float> %tmp74 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp89 = bitcast <4 x float> %tmp88 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 ) ; <<4 x i32>> [#uses=1]
+ %tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float> ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp105.upgrd.2, <4 x float>* %a
+ %tmp108 = bitcast <4 x float> %tmp46 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp109 = shufflevector <4 x i32> %tmp108, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2]
+ %tmp109.upgrd.3 = bitcast <4 x i32> %tmp109 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp119 = xor <4 x i32> %tmp109, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
+ %tmp120 = bitcast <4 x i32> %tmp119 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp133 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp109.upgrd.3, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp134 = bitcast <4 x float> %tmp133 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp148 = bitcast <4 x float> %tmp147 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 ) ; <<4 x i32>> [#uses=1]
+ %tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float> ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp166.upgrd.4, <4 x float>* %b
+ %tmp169 = bitcast <4 x float> %tmp48 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp170 = shufflevector <4 x i32> %tmp169, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2]
+ %tmp170.upgrd.5 = bitcast <4 x i32> %tmp170 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp180 = xor <4 x i32> %tmp170, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
+ %tmp181 = bitcast <4 x i32> %tmp180 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp194 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp170.upgrd.5, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp195 = bitcast <4 x float> %tmp194 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp209 = bitcast <4 x float> %tmp208 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 ) ; <<4 x i32>> [#uses=1]
+ %tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float> ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp227.upgrd.6, <4 x float>* %c
+ %tmp230 = bitcast <4 x float> %tmp50 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp231 = shufflevector <4 x i32> %tmp230, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 > ; <<4 x i32>> [#uses=2]
+ %tmp231.upgrd.7 = bitcast <4 x i32> %tmp231 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp241 = xor <4 x i32> %tmp231, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 > ; <<4 x i32>> [#uses=1]
+ %tmp242 = bitcast <4 x i32> %tmp241 to <4 x float> ; <<4 x float>> [#uses=1]
+ %tmp255 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp231.upgrd.7, <4 x float> %tmp44, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp256 = bitcast <4 x float> %tmp255 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 ) ; <<4 x float>> [#uses=1]
+ %tmp270 = bitcast <4 x float> %tmp269 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 ) ; <<4 x i32>> [#uses=1]
+ %tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float> ; <<4 x float>> [#uses=1]
+ store <4 x float> %tmp288.upgrd.8, <4 x float>* %d
+ ret i32 0
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll b/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll
new file mode 100644
index 0000000000..1c75f93915
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2006-05-02-InstrSched1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
+; RUN: grep asm-printer | grep 14
+;
+@size20 = external global i32 ; <i32*> [#uses=1]
+@in5 = external global i8* ; <i8**> [#uses=1]
+
+define i32 @compare(i8* %a, i8* %b) nounwind {
+ %tmp = bitcast i8* %a to i32* ; <i32*> [#uses=1]
+ %tmp1 = bitcast i8* %b to i32* ; <i32*> [#uses=1]
+ %tmp.upgrd.1 = load i32* @size20 ; <i32> [#uses=1]
+ %tmp.upgrd.2 = load i8** @in5 ; <i8*> [#uses=2]
+ %tmp3 = load i32* %tmp1 ; <i32> [#uses=1]
+ %gep.upgrd.3 = zext i32 %tmp3 to i64 ; <i64> [#uses=1]
+ %tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3 ; <i8*> [#uses=2]
+ %tmp7 = load i32* %tmp ; <i32> [#uses=1]
+ %gep.upgrd.4 = zext i32 %tmp7 to i64 ; <i64> [#uses=1]
+ %tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4 ; <i8*> [#uses=2]
+ %tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 ) ; <i32> [#uses=1]
+ ret i32 %tmp.upgrd.5
+}
+
+declare i32 @memcmp(i8*, i8*, i32)
+
diff --git a/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll b/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll
new file mode 100644
index 0000000000..95eefa1e71
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2006-05-02-InstrSched2.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
+; RUN: grep asm-printer | grep 13
+
+define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
+newFuncRoot:
+ br label %cond_true456.i
+bb459.i.exitStub: ; preds = %cond_true456.i
+ store i32 %tmp449.i, i32* %tmp449.i.out
+ ret void
+cond_true456.i: ; preds = %cond_true456.i, %newFuncRoot
+ %__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ] ; <i8*> [#uses=2]
+ %__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ] ; <i32> [#uses=1]
+ %tmp446.i = mul i32 %__h.2.4.i, 5 ; <i32> [#uses=1]
+ %tmp.i = load i8* %__s441.2.4.i ; <i8> [#uses=1]
+ %tmp448.i = sext i8 %tmp.i to i32 ; <i32> [#uses=1]
+ %tmp449.i = add i32 %tmp448.i, %tmp446.i ; <i32> [#uses=2]
+ %tmp450.i = ptrtoint i8* %__s441.2.4.i to i32 ; <i32> [#uses=1]
+ %tmp451.i = add i32 %tmp450.i, 1 ; <i32> [#uses=1]
+ %tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8* ; <i8*> [#uses=2]
+ %tmp45435.i = load i8* %tmp451.i.upgrd.1 ; <i8> [#uses=1]
+ %tmp45536.i = icmp eq i8 %tmp45435.i, 0 ; <i1> [#uses=1]
+ br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i
+}
+
diff --git a/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll b/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll
new file mode 100644
index 0000000000..37c510786a
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2006-05-11-InstrSched.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mcpu=penryn -mattr=+sse2 -stats -realign-stack=0 2>&1 | \
+; RUN: grep "asm-printer" | grep 35
+
+target datalayout = "e-p:32:32"
+define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
+entry:
+ %tmp9 = icmp slt i32 %M, 5 ; <i1> [#uses=1]
+ br i1 %tmp9, label %return, label %cond_true
+
+cond_true: ; preds = %cond_true, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
+ %tmp. = shl i32 %indvar, 2 ; <i32> [#uses=1]
+ %tmp.10 = add nsw i32 %tmp., 1 ; <i32> [#uses=2]
+ %tmp31 = add nsw i32 %tmp.10, -1 ; <i32> [#uses=4]
+ %tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1]
+ %tmp = load <16 x i8>* %tmp34, align 1
+ %tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
+ %tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
+ %tmp54 = bitcast <16 x i8> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2]
+ %tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1]
+ %tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1]
+ %tmp66 = load <16 x i8>* %tmp65, align 1
+ %tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
+ %tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
+ %tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
+ %tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp88 = add <4 x i32> %tmp87, %tmp77 ; <<4 x i32>> [#uses=2]
+ %tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64> ; <<2 x i64>> [#uses=1]
+ %tmp99 = tail call <4 x i32> @llvm.x86.sse2.psra.d( <4 x i32> %tmp88, <4 x i32> %tmp55 ) ; <<4 x i32>> [#uses=1]
+ %tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64> ; <<2 x i64>> [#uses=2]
+ %tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 > ; <<2 x i64>> [#uses=1]
+ %tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1]
+ %tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1]
+ %tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1]
+ %tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1]
+ %tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
+ store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
+ %tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
+ %tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp.upgrd.8, label %cond_true, label %return
+
+return: ; preds = %cond_true, %entry
+ ret void
+}
+
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
diff --git a/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll b/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll
new file mode 100644
index 0000000000..a1b973d7cc
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2008-02-18-TailMergingBug.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats 2>&1 | grep "Number of block tails merged" | grep 16
+; PR1909
+
+@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00" ; <[48 x i8]*> [#uses=1]
+
+define void @minmax(float* %result) nounwind optsize {
+entry:
+ %tmp2 = load float* %result, align 4 ; <float> [#uses=6]
+ %tmp4 = getelementptr float* %result, i32 2 ; <float*> [#uses=5]
+ %tmp5 = load float* %tmp4, align 4 ; <float> [#uses=10]
+ %tmp7 = getelementptr float* %result, i32 4 ; <float*> [#uses=5]
+ %tmp8 = load float* %tmp7, align 4 ; <float> [#uses=8]
+ %tmp10 = getelementptr float* %result, i32 6 ; <float*> [#uses=3]
+ %tmp11 = load float* %tmp10, align 4 ; <float> [#uses=8]
+ %tmp12 = fcmp olt float %tmp8, %tmp11 ; <i1> [#uses=5]
+ br i1 %tmp12, label %bb, label %bb21
+
+bb: ; preds = %entry
+ %tmp23469 = fcmp olt float %tmp5, %tmp8 ; <i1> [#uses=1]
+ br i1 %tmp23469, label %bb26, label %bb30
+
+bb21: ; preds = %entry
+ %tmp23 = fcmp olt float %tmp5, %tmp11 ; <i1> [#uses=1]
+ br i1 %tmp23, label %bb26, label %bb30
+
+bb26: ; preds = %bb21, %bb
+ %tmp52471 = fcmp olt float %tmp2, %tmp5 ; <i1> [#uses=1]
+ br i1 %tmp52471, label %bb111, label %bb59
+
+bb30: ; preds = %bb21, %bb
+ br i1 %tmp12, label %bb40, label %bb50
+
+bb40: ; preds = %bb30
+ %tmp52473 = fcmp olt float %tmp2, %tmp8 ; <i1> [#uses=1]
+ br i1 %tmp52473, label %bb111, label %bb59
+
+bb50: ; preds = %bb30
+ %tmp52 = fcmp olt float %tmp2, %tmp11 ; <i1> [#uses=1]
+ br i1 %tmp52, label %bb111, label %bb59
+
+bb59: ; preds = %bb50, %bb40, %bb26
+ br i1 %tmp12, label %bb72, label %bb80
+
+bb72: ; preds = %bb59
+ %tmp82475 = fcmp olt float %tmp5, %tmp8 ; <i1> [#uses=2]
+ %brmerge786 = or i1 %tmp82475, %tmp12 ; <i1> [#uses=1]
+ %tmp4.mux787 = select i1 %tmp82475, float* %tmp4, float* %tmp7 ; <float*> [#uses=1]
+ br i1 %brmerge786, label %bb111, label %bb103
+
+bb80: ; preds = %bb59
+ %tmp82 = fcmp olt float %tmp5, %tmp11 ; <i1> [#uses=2]
+ %brmerge = or i1 %tmp82, %tmp12 ; <i1> [#uses=1]
+ %tmp4.mux = select i1 %tmp82, float* %tmp4, float* %tmp7 ; <float*> [#uses=1]
+ br i1 %brmerge, label %bb111, label %bb103
+
+bb103: ; preds = %bb80, %bb72
+ br label %bb111
+
+bb111: ; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
+ %iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ] ; <float*> [#uses=1]
+ %iftmp.0.0 = load float* %iftmp.0.0.in ; <float> [#uses=1]
+ %tmp125 = fcmp ogt float %tmp8, %tmp11 ; <i1> [#uses=5]
+ br i1 %tmp125, label %bb128, label %bb136
+
+bb128: ; preds = %bb111
+ %tmp138477 = fcmp ogt float %tmp5, %tmp8 ; <i1> [#uses=1]
+ br i1 %tmp138477, label %bb141, label %bb145
+
+bb136: ; preds = %bb111
+ %tmp138 = fcmp ogt float %tmp5, %tmp11 ; <i1> [#uses=1]
+ br i1 %tmp138, label %bb141, label %bb145
+
+bb141: ; preds = %bb136, %bb128
+ %tmp167479 = fcmp ogt float %tmp2, %tmp5 ; <i1> [#uses=1]
+ br i1 %tmp167479, label %bb226, label %bb174
+
+bb145: ; preds = %bb136, %bb128
+ br i1 %tmp125, label %bb155, label %bb165
+
+bb155: ; preds = %bb145
+ %tmp167481 = fcmp ogt float %tmp2, %tmp8 ; <i1> [#uses=1]
+ br i1 %tmp167481, label %bb226, label %bb174
+
+bb165: ; preds = %bb145
+ %tmp167 = fcmp ogt float %tmp2, %tmp11 ; <i1> [#uses=1]
+ br i1 %tmp167, label %bb226, label %bb174
+
+bb174: ; preds = %bb165, %bb155, %bb141
+ br i1 %tmp125, label %bb187, label %bb195
+
+bb187: ; preds = %bb174
+ %tmp197483 = fcmp ogt float %tmp5, %tmp8 ; <i1> [#uses=2]
+ %brmerge790 = or i1 %tmp197483, %tmp125 ; <i1> [#uses=1]
+ %tmp4.mux791 = select i1 %tmp197483, float* %tmp4, float* %tmp7 ; <float*> [#uses=1]
+ br i1 %brmerge790, label %bb226, label %bb218
+
+bb195: ; preds = %bb174
+ %tmp197 = fcmp ogt float %tmp5, %tmp11 ; <i1> [#uses=2]
+ %brmerge788 = or i1 %tmp197, %tmp125 ; <i1> [#uses=1]
+ %tmp4.mux789 = select i1 %tmp197, float* %tmp4, float* %tmp7 ; <float*> [#uses=1]
+ br i1 %brmerge788, label %bb226, label %bb218
+
+bb218: ; preds = %bb195, %bb187
+ br label %bb226
+
+bb226: ; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
+ %iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ] ; <float*> [#uses=1]
+ %iftmp.7.0 = load float* %iftmp.7.0.in ; <float> [#uses=1]
+ %tmp229 = getelementptr float* %result, i32 1 ; <float*> [#uses=7]
+ %tmp230 = load float* %tmp229, align 4 ; <float> [#uses=6]
+ %tmp232 = getelementptr float* %result, i32 3 ; <float*> [#uses=5]
+ %tmp233 = load float* %tmp232, align 4 ; <float> [#uses=10]
+ %tmp235 = getelementptr float* %result, i32 5 ; <float*> [#uses=5]
+ %tmp236 = load float* %tmp235, align 4 ; <float> [#uses=8]
+ %tmp238 = getelementptr float* %result, i32 7 ; <float*> [#uses=3]
+ %tmp239 = load float* %tmp238, align 4 ; <float> [#uses=8]
+ %tmp240 = fcmp olt float %tmp236, %tmp239 ; <i1> [#uses=5]
+ br i1 %tmp240, label %bb243, label %bb251
+
+bb243: ; preds = %bb226
+ %tmp253485 = fcmp olt float %tmp233, %tmp236 ; <i1> [#uses=1]
+ br i1 %tmp253485, label %bb256, label %bb260
+
+bb251: ; preds = %bb226
+ %tmp253 = fcmp olt float %tmp233, %tmp239 ; <i1> [#uses=1]
+ br i1 %tmp253, label %bb256, label %bb260
+
+bb256: ; preds = %bb251, %bb243
+ %tmp282487 = fcmp olt float %tmp230, %tmp233 ; <i1> [#uses=1]
+ br i1 %tmp282487, label %bb341, label %bb289
+
+bb260: ; preds = %bb251, %bb243
+ br i1 %tmp240, label %bb270, label %bb280
+
+bb270: ; preds = %bb260
+ %tmp282489 = fcmp olt float %tmp230, %tmp236 ; <i1> [#uses=1]
+ br i1 %tmp282489, label %bb341, label %bb289
+
+bb280: ; preds = %bb260
+ %tmp282 = fcmp olt float %tmp230, %tmp239 ; <i1> [#uses=1]
+ br i1 %tmp282, label %bb341, label %bb289
+
+bb289: ; preds = %bb280, %bb270, %bb256
+ br i1 %tmp240, label %bb302, label %bb310
+
+bb302: ; preds = %bb289
+ %tmp312491 = fcmp olt float %tmp233, %tmp236 ; <i1> [#uses=2]
+ %brmerge793 = or i1 %tmp312491, %tmp240 ; <i1> [#uses=1]
+ %tmp232.mux794 = select i1 %tmp312491, float* %tmp232, float* %tmp235 ; <float*> [#uses=1]
+ br i1 %brmerge793, label %bb341, label %bb333
+
+bb310: ; preds = %bb289
+ %tmp312 = fcmp olt float %tmp233, %tmp239 ; <i1> [#uses=2]
+ %brmerge792 = or i1 %tmp312, %tmp240 ; <i1> [#uses=1]
+ %tmp232.mux = select i1 %tmp312, float* %tmp232, float* %tmp235 ; <float*> [#uses=1]
+ br i1 %brmerge792, label %bb341, label %bb333
+
+bb333: ; preds = %bb310, %bb302
+ br label %bb341
+
+bb341: ; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
+ %iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ] ; <float*> [#uses=1]
+ %iftmp.14.0 = load float* %iftmp.14.0.in ; <float> [#uses=1]
+ %tmp355 = fcmp ogt float %tmp236, %tmp239 ; <i1> [#uses=5]
+ br i1 %tmp355, label %bb358, label %bb366
+
+bb358: ; preds = %bb341
+ %tmp368493 = fcmp ogt float %tmp233, %tmp236 ; <i1> [#uses=1]
+ br i1 %tmp368493, label %bb371, label %bb375
+
+bb366: ; preds = %bb341
+ %tmp368 = fcmp ogt float %tmp233, %tmp239 ; <i1> [#uses=1]
+ br i1 %tmp368, label %bb371, label %bb375
+
+bb371: ; preds = %bb366, %bb358
+ %tmp397495 = fcmp ogt float %tmp230, %tmp233 ; <i1> [#uses=1]
+ br i1 %tmp397495, label %bb456, label %bb404
+
+bb375: ; preds = %bb366, %bb358
+ br i1 %tmp355, label %bb385, label %bb395
+
+bb385: ; preds = %bb375
+ %tmp397497 = fcmp ogt float %tmp230, %tmp236 ; <i1> [#uses=1]
+ br i1 %tmp397497, label %bb456, label %bb404
+
+bb395: ; preds = %bb375
+ %tmp397 = fcmp ogt float %tmp230, %tmp239 ; <i1> [#uses=1]
+ br i1 %tmp397, label %bb456, label %bb404
+
+bb404: ; preds = %bb395, %bb385, %bb371
+ br i1 %tmp355, label %bb417, label %bb425
+
+bb417: ; preds = %bb404
+ %tmp427499 = fcmp ogt float %tmp233, %tmp236 ; <i1> [#uses=2]
+ %brmerge797 = or i1 %tmp427499, %tmp355 ; <i1> [#uses=1]
+ %tmp232.mux798 = select i1 %tmp427499, float* %tmp232, float* %tmp235 ; <float*> [#uses=1]
+ br i1 %brmerge797, label %bb456, label %bb448
+
+bb425: ; preds = %bb404
+ %tmp427 = fcmp ogt float %tmp233, %tmp239 ; <i1> [#uses=2]
+ %brmerge795 = or i1 %tmp427, %tmp355 ; <i1> [#uses=1]
+ %tmp232.mux796 = select i1 %tmp427, float* %tmp232, float* %tmp235 ; <float*> [#uses=1]
+ br i1 %brmerge795, label %bb456, label %bb448
+
+bb448: ; preds = %bb425, %bb417
+ br label %bb456
+
+bb456: ; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
+ %iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ] ; <float*> [#uses=1]
+ %iftmp.21.0 = load float* %iftmp.21.0.in ; <float> [#uses=1]
+ %tmp458459 = fpext float %iftmp.21.0 to double ; <double> [#uses=1]
+ %tmp460461 = fpext float %iftmp.7.0 to double ; <double> [#uses=1]
+ %tmp462463 = fpext float %iftmp.14.0 to double ; <double> [#uses=1]
+ %tmp464465 = fpext float %iftmp.0.0 to double ; <double> [#uses=1]
+ %tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind ; <i32> [#uses=0]
+ ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll
new file mode 100644
index 0000000000..b2cf34cd20
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2008-10-27-CoalescerBug.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats 2>&1 | FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
+
+define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
+entry:
+ br label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar93 = phi i32 [ 0, %entry ], [ %idim.030, %bb ] ; <i32> [#uses=2]
+ %idim.030 = add i32 %indvar93, 1 ; <i32> [#uses=1]
+ %0 = add i32 %indvar93, 2 ; <i32> [#uses=1]
+ %1 = icmp sgt i32 %0, 2 ; <i1> [#uses=1]
+ br i1 %1, label %bb30.loopexit, label %bb
+
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: %bb3
+bb3: ; preds = %bb30.loopexit, %bb25, %bb3
+ %2 = load i32* null, align 4 ; <i32> [#uses=1]
+ %3 = mul i32 %2, 0 ; <i32> [#uses=1]
+ %4 = icmp slt i32 0, %3 ; <i1> [#uses=1]
+ br i1 %4, label %bb18, label %bb3
+
+bb18: ; preds = %bb3
+ %5 = fdiv double %11, 0.000000e+00 ; <double> [#uses=1]
+ %6 = tail call double @sin(double %5) nounwind readonly ; <double> [#uses=1]
+ br label %bb24.preheader
+
+bb22.preheader: ; preds = %bb24.preheader, %bb22.preheader
+ br label %bb22.preheader
+
+bb25: ; preds = %bb24.preheader
+ %7 = fmul double 0.000000e+00, %6 ; <double> [#uses=0]
+ %8 = add i32 %i3.122100, 0 ; <i32> [#uses=1]
+ %9 = icmp sgt i32 %8, 0 ; <i1> [#uses=1]
+ br i1 %9, label %bb3, label %bb24.preheader
+
+bb24.preheader: ; preds = %bb25, %bb18
+ %i3.122100 = or i32 0, 1 ; <i32> [#uses=2]
+ %10 = icmp slt i32 0, %i3.122100 ; <i1> [#uses=1]
+ br i1 %10, label %bb25, label %bb22.preheader
+
+bb30.loopexit: ; preds = %bb
+ %11 = fmul double 0.000000e+00, 0x401921FB54442D1C ; <double> [#uses=1]
+ br label %bb3
+}
+
+declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll b/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll
new file mode 100644
index 0000000000..9cbf350940
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2009-02-25-CommuteBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | not grep commuted
+; rdar://6608609
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+ %tmp.i2 = bitcast <2 x double> %B to <2 x i64> ; <<2 x i64>> [#uses=1]
+ %tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458> ; <<2 x i64>> [#uses=1]
+ %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double> ; <<2 x double>> [#uses=1]
+ %0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone ; <<2 x double>> [#uses=1]
+ %tmp.i = fadd <2 x double> %0, %C ; <<2 x double>> [#uses=1]
+ ret <2 x double> %tmp.i
+}
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
diff --git a/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll
new file mode 100644
index 0000000000..d50fe6f73a
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2009-02-26-MachineLICMBug.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
+; rdar://6627786
+; rdar://7792037
+
+target triple = "x86_64-apple-darwin10.0"
+ %struct.Key = type { i64 }
+ %struct.__Rec = type opaque
+ %struct.__vv = type { }
+
+define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
+entry:
+ br label %bb4
+
+bb4: ; preds = %bb.i, %bb26, %bb4, %entry
+; CHECK: %bb4
+; CHECK: xorb
+; CHECK: callq
+; CHECK: movq
+; CHECK: xorl
+; CHECK: xorb
+
+ %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0]
+ %ins = or i64 %p, 2097152 ; <i64> [#uses=1]
+ %1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind ; <i32> [#uses=1]
+ %cond = icmp eq i32 %1, 1 ; <i1> [#uses=1]
+ br i1 %cond, label %bb26, label %bb4
+
+bb26: ; preds = %bb4
+ %2 = and i64 %ins, 15728640 ; <i64> [#uses=1]
+ %cond.i = icmp eq i64 %2, 1048576 ; <i1> [#uses=1]
+ br i1 %cond.i, label %bb.i, label %bb4
+
+bb.i: ; preds = %bb26
+ %3 = load i32* null, align 4 ; <i32> [#uses=1]
+ %4 = uitofp i32 %3 to float ; <float> [#uses=1]
+ %.sum13.i = add i64 0, 4 ; <i64> [#uses=1]
+ %5 = getelementptr i8* null, i64 %.sum13.i ; <i8*> [#uses=1]
+ %6 = bitcast i8* %5 to i32* ; <i32*> [#uses=1]
+ %7 = load i32* %6, align 4 ; <i32> [#uses=1]
+ %8 = uitofp i32 %7 to float ; <float> [#uses=1]
+ %.sum.i = add i64 0, 8 ; <i64> [#uses=1]
+ %9 = getelementptr i8* null, i64 %.sum.i ; <i8*> [#uses=1]
+ %10 = bitcast i8* %9 to i32* ; <i32*> [#uses=1]
+ %11 = load i32* %10, align 4 ; <i32> [#uses=1]
+ %12 = uitofp i32 %11 to float ; <float> [#uses=1]
+ %13 = insertelement <4 x float> undef, float %4, i32 0 ; <<4 x float>> [#uses=1]
+ %14 = insertelement <4 x float> %13, float %8, i32 1 ; <<4 x float>> [#uses=1]
+ %15 = insertelement <4 x float> %14, float %12, i32 2 ; <<4 x float>> [#uses=1]
+ store <4 x float> %15, <4 x float>* null, align 16
+ br label %bb4
+}
+
+declare i32 @xxGetOffsetForCode(...)
+
+declare i32 @xxCalculateMidType(...)
diff --git a/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll b/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll
new file mode 100644
index 0000000000..d934ec9a88
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2009-03-23-MultiUseSched.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
+; RUN: not grep spill %t
+; RUN: not grep "%rsp" %t
+; RUN: not grep "%rbp" %t
+
+; The register-pressure scheduler should be able to schedule this in a
+; way that does not require spills.
+
+@X = external global i64 ; <i64*> [#uses=25]
+
+define fastcc i64 @foo() nounwind {
+ %tmp = load volatile i64* @X ; <i64> [#uses=7]
+ %tmp1 = load volatile i64* @X ; <i64> [#uses=5]
+ %tmp2 = load volatile i64* @X ; <i64> [#uses=3]
+ %tmp3 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp4 = load volatile i64* @X ; <i64> [#uses=5]
+ %tmp5 = load volatile i64* @X ; <i64> [#uses=3]
+ %tmp6 = load volatile i64* @X ; <i64> [#uses=2]
+ %tmp7 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp8 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp9 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp10 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp11 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp12 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp13 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp14 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp15 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp16 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp17 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp18 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp19 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp20 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp21 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp22 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp23 = load volatile i64* @X ; <i64> [#uses=1]
+ %tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8) ; <i64> [#uses=1]
+ %tmp25 = add i64 %tmp6, %tmp5 ; <i64> [#uses=1]
+ %tmp26 = add i64 %tmp25, %tmp4 ; <i64> [#uses=1]
+ %tmp27 = add i64 %tmp7, %tmp4 ; <i64> [#uses=1]
+ %tmp28 = add i64 %tmp27, %tmp26 ; <i64> [#uses=1]
+ %tmp29 = add i64 %tmp28, %tmp24 ; <i64> [#uses=2]
+ %tmp30 = add i64 %tmp2, %tmp1 ; <i64> [#uses=1]
+ %tmp31 = add i64 %tmp30, %tmp ; <i64> [#uses=1]
+ %tmp32 = add i64 %tmp2, %tmp1 ; <i64> [#uses=1]
+ %tmp33 = add i64 %tmp31, %tmp32 ; <i64> [#uses=1]
+ %tmp34 = add i64 %tmp29, %tmp3 ; <i64> [#uses=5]
+ %tmp35 = add i64 %tmp33, %tmp ; <i64> [#uses=1]
+ %tmp36 = add i64 %tmp35, %tmp29 ; <i64> [#uses=7]
+ %tmp37 = call i64 @llvm.bswap.i64(i64 %tmp9) ; <i64> [#uses=1]
+ %tmp38 = add i64 %tmp4, %tmp5 ; <i64> [#uses=1]
+ %tmp39 = add i64 %tmp38, %tmp34 ; <i64> [#uses=1]
+ %tmp40 = add i64 %tmp6, %tmp37 ; <i64> [#uses=1]
+ %tmp41 = add i64 %tmp40, %tmp39 ; <i64> [#uses=1]
+ %tmp42 = add i64 %tmp41, %tmp34 ; <i64> [#uses=2]
+ %tmp43 = add i64 %tmp1, %tmp ; <i64> [#uses=1]
+ %tmp44 = add i64 %tmp36, %tmp43 ; <i64> [#uses=1]
+ %tmp45 = add i64 %tmp1, %tmp ; <i64> [#uses=1]
+ %tmp46 = add i64 %tmp44, %tmp45 ; <i64> [#uses=1]
+ %tmp47 = add i64 %tmp42, %tmp2 ; <i64> [#uses=5]
+ %tmp48 = add i64 %tmp36, %tmp46 ; <i64> [#uses=1]
+ %tmp49 = add i64 %tmp48, %tmp42 ; <i64> [#uses=7]
+ %tmp50 = call i64 @llvm.bswap.i64(i64 %tmp10) ; <i64> [#uses=1]
+ %tmp51 = add i64 %tmp34, %tmp4 ; <i64> [#uses=1]
+ %tmp52 = add i64 %tmp51, %tmp47 ; <i64> [#uses=1]
+ %tmp53 = add i64 %tmp5, %tmp50 ; <i64> [#uses=1]
+ %tmp54 = add i64 %tmp53, %tmp52 ; <i64> [#uses=1]
+ %tmp55 = add i64 %tmp54, %tmp47 ; <i64> [#uses=2]
+ %tmp56 = add i64 %tmp36, %tmp ; <i64> [#uses=1]
+ %tmp57 = add i64 %tmp49, %tmp56 ; <i64> [#uses=1]
+ %tmp58 = add i64 %tmp36, %tmp ; <i64> [#uses=1]
+ %tmp59 = add i64 %tmp57, %tmp58 ; <i64> [#uses=1]
+ %tmp60 = add i64 %tmp55, %tmp1 ; <i64> [#uses=5]
+ %tmp61 = add i64 %tmp49, %tmp59 ; <i64> [#uses=1]
+ %tmp62 = add i64 %tmp61, %tmp55 ; <i64> [#uses=7]
+ %tmp63 = call i64 @llvm.bswap.i64(i64 %tmp11) ; <i64> [#uses=1]
+ %tmp64 = add i64 %tmp47, %tmp34 ; <i64> [#uses=1]
+ %tmp65 = add i64 %tmp64, %tmp60 ; <i64> [#uses=1]
+ %tmp66 = add i64 %tmp4, %tmp63 ; <i64> [#uses=1]
+ %tmp67 = add i64 %tmp66, %tmp65 ; <i64> [#uses=1]
+ %tmp68 = add i64 %tmp67, %tmp60 ; <i64> [#uses=2]
+ %tmp69 = add i64 %tmp49, %tmp36 ; <i64> [#uses=1]
+ %tmp70 = add i64 %tmp62, %tmp69 ; <i64> [#uses=1]
+ %tmp71 = add i64 %tmp49, %tmp36 ; <i64> [#uses=1]
+ %tmp72 = add i64 %tmp70, %tmp71 ; <i64> [#uses=1]
+ %tmp73 = add i64 %tmp68, %tmp ; <i64> [#uses=5]
+ %tmp74 = add i64 %tmp62, %tmp72 ; <i64> [#uses=1]
+ %tmp75 = add i64 %tmp74, %tmp68 ; <i64> [#uses=7]
+ %tmp76 = call i64 @llvm.bswap.i64(i64 %tmp12) ; <i64> [#uses=1]
+ %tmp77 = add i64 %tmp60, %tmp47 ; <i64> [#uses=1]
+ %tmp78 = add i64 %tmp77, %tmp73 ; <i64> [#uses=1]
+ %tmp79 = add i64 %tmp34, %tmp76 ; <i64> [#uses=1]
+ %tmp80 = add i64 %tmp79, %tmp78 ; <i64> [#uses=1]
+ %tmp81 = add i64 %tmp80, %tmp73 ; <i64> [#uses=2]
+ %tmp82 = add i64 %tmp62, %tmp49 ; <i64> [#uses=1]
+ %tmp83 = add i64 %tmp75, %tmp82 ; <i64> [#uses=1]
+ %tmp84 = add i64 %tmp62, %tmp49 ; <i64> [#uses=1]
+ %tmp85 = add i64 %tmp83, %tmp84 ; <i64> [#uses=1]
+ %tmp86 = add i64 %tmp81, %tmp36 ; <i64> [#uses=5]
+ %tmp87 = add i64 %tmp75, %tmp85 ; <i64> [#uses=1]
+ %tmp88 = add i64 %tmp87, %tmp81 ; <i64> [#uses=7]
+ %tmp89 = call i64 @llvm.bswap.i64(i64 %tmp13) ; <i64> [#uses=1]
+ %tmp90 = add i64 %tmp73, %tmp60 ; <i64> [#uses=1]
+ %tmp91 = add i64 %tmp90, %tmp86 ; <i64> [#uses=1]
+ %tmp92 = add i64 %tmp47, %tmp89 ; <i64> [#uses=1]
+ %tmp93 = add i64 %tmp92, %tmp91 ; <i64> [#uses=1]
+ %tmp94 = add i64 %tmp93, %tmp86 ; <i64> [#uses=2]
+ %tmp95 = add i64 %tmp75, %tmp62 ; <i64> [#uses=1]
+ %tmp96 = add i64 %tmp88, %tmp95 ; <i64> [#uses=1]
+ %tmp97 = add i64 %tmp75, %tmp62 ; <i64> [#uses=1]
+ %tmp98 = add i64 %tmp96, %tmp97 ; <i64> [#uses=1]
+ %tmp99 = add i64 %tmp94, %tmp49 ; <i64> [#uses=5]
+ %tmp100 = add i64 %tmp88, %tmp98 ; <i64> [#uses=1]
+ %tmp101 = add i64 %tmp100, %tmp94 ; <i64> [#uses=7]
+ %tmp102 = call i64 @llvm.bswap.i64(i64 %tmp14) ; <i64> [#uses=1]
+ %tmp103 = add i64 %tmp86, %tmp73 ; <i64> [#uses=1]
+ %tmp104 = add i64 %tmp103, %tmp99 ; <i64> [#uses=1]
+ %tmp105 = add i64 %tmp102, %tmp60 ; <i64> [#uses=1]
+ %tmp106 = add i64 %tmp105, %tmp104 ; <i64> [#uses=1]
+ %tmp107 = add i64 %tmp106, %tmp99 ; <i64> [#uses=2]
+ %tmp108 = add i64 %tmp88, %tmp75 ; <i64> [#uses=1]
+ %tmp109 = add i64 %tmp101, %tmp108 ; <i64> [#uses=1]
+ %tmp110 = add i64 %tmp88, %tmp75 ; <i64> [#uses=1]
+ %tmp111 = add i64 %tmp109, %tmp110 ; <i64> [#uses=1]
+ %tmp112 = add i64 %tmp107, %tmp62 ; <i64> [#uses=5]
+ %tmp113 = add i64 %tmp101, %tmp111 ; <i64> [#uses=1]
+ %tmp114 = add i64 %tmp113, %tmp107 ; <i64> [#uses=7]
+ %tmp115 = call i64 @llvm.bswap.i64(i64 %tmp15) ; <i64> [#uses=1]
+ %tmp116 = add i64 %tmp99, %tmp86 ; <i64> [#uses=1]
+ %tmp117 = add i64 %tmp116, %tmp112 ; <i64> [#uses=1]
+ %tmp118 = add i64 %tmp115, %tmp73 ; <i64> [#uses=1]
+ %tmp119 = add i64 %tmp118, %tmp117 ; <i64> [#uses=1]
+ %tmp120 = add i64 %tmp119, %tmp112 ; <i64> [#uses=2]
+ %tmp121 = add i64 %tmp101, %tmp88 ; <i64> [#uses=1]
+ %tmp122 = add i64 %tmp114, %tmp121 ; <i64> [#uses=1]
+ %tmp123 = add i64 %tmp101, %tmp88 ; <i64> [#uses=1]
+ %tmp124 = add i64 %tmp122, %tmp123 ; <i64> [#uses=1]
+ %tmp125 = add i64 %tmp120, %tmp75 ; <i64> [#uses=5]
+ %tmp126 = add i64 %tmp114, %tmp124 ; <i64> [#uses=1]
+ %tmp127 = add i64 %tmp126, %tmp120 ; <i64> [#uses=7]
+ %tmp128 = call i64 @llvm.bswap.i64(i64 %tmp16) ; <i64> [#uses=1]
+ %tmp129 = add i64 %tmp112, %tmp99 ; <i64> [#uses=1]
+ %tmp130 = add i64 %tmp129, %tmp125 ; <i64> [#uses=1]
+ %tmp131 = add i64 %tmp128, %tmp86 ; <i64> [#uses=1]
+ %tmp132 = add i64 %tmp131, %tmp130 ; <i64> [#uses=1]
+ %tmp133 = add i64 %tmp132, %tmp125 ; <i64> [#uses=2]
+ %tmp134 = add i64 %tmp114, %tmp101 ; <i64> [#uses=1]
+ %tmp135 = add i64 %tmp127, %tmp134 ; <i64> [#uses=1]
+ %tmp136 = add i64 %tmp114, %tmp101 ; <i64> [#uses=1]
+ %tmp137 = add i64 %tmp135, %tmp136 ; <i64> [#uses=1]
+ %tmp138 = add i64 %tmp133, %tmp88 ; <i64> [#uses=5]
+ %tmp139 = add i64 %tmp127, %tmp137 ; <i64> [#uses=1]
+ %tmp140 = add i64 %tmp139, %tmp133 ; <i64> [#uses=7]
+ %tmp141 = call i64 @llvm.bswap.i64(i64 %tmp17) ; <i64> [#uses=1]
+ %tmp142 = add i64 %tmp125, %tmp112 ; <i64> [#uses=1]
+ %tmp143 = add i64 %tmp142, %tmp138 ; <i64> [#uses=1]
+ %tmp144 = add i64 %tmp141, %tmp99 ; <i64> [#uses=1]
+ %tmp145 = add i64 %tmp144, %tmp143 ; <i64> [#uses=1]
+ %tmp146 = add i64 %tmp145, %tmp138 ; <i64> [#uses=2]
+ %tmp147 = add i64 %tmp127, %tmp114 ; <i64> [#uses=1]
+ %tmp148 = add i64 %tmp140, %tmp147 ; <i64> [#uses=1]
+ %tmp149 = add i64 %tmp127, %tmp114 ; <i64> [#uses=1]
+ %tmp150 = add i64 %tmp148, %tmp149 ; <i64> [#uses=1]
+ %tmp151 = add i64 %tmp146, %tmp101 ; <i64> [#uses=5]
+ %tmp152 = add i64 %tmp140, %tmp150 ; <i64> [#uses=1]
+ %tmp153 = add i64 %tmp152, %tmp146 ; <i64> [#uses=7]
+ %tmp154 = call i64 @llvm.bswap.i64(i64 %tmp18) ; <i64> [#uses=1]
+ %tmp155 = add i64 %tmp138, %tmp125 ; <i64> [#uses=1]
+ %tmp156 = add i64 %tmp155, %tmp151 ; <i64> [#uses=1]
+ %tmp157 = add i64 %tmp154, %tmp112 ; <i64> [#uses=1]
+ %tmp158 = add i64 %tmp157, %tmp156 ; <i64> [#uses=1]
+ %tmp159 = add i64 %tmp158, %tmp151 ; <i64> [#uses=2]
+ %tmp160 = add i64 %tmp140, %tmp127 ; <i64> [#uses=1]
+ %tmp161 = add i64 %tmp153, %tmp160 ; <i64> [#uses=1]
+ %tmp162 = add i64 %tmp140, %tmp127 ; <i64> [#uses=1]
+ %tmp163 = add i64 %tmp161, %tmp162 ; <i64> [#uses=1]
+ %tmp164 = add i64 %tmp159, %tmp114 ; <i64> [#uses=5]
+ %tmp165 = add i64 %tmp153, %tmp163 ; <i64> [#uses=1]
+ %tmp166 = add i64 %tmp165, %tmp159 ; <i64> [#uses=7]
+ %tmp167 = call i64 @llvm.bswap.i64(i64 %tmp19) ; <i64> [#uses=1]
+ %tmp168 = add i64 %tmp151, %tmp138 ; <i64> [#uses=1]
+ %tmp169 = add i64 %tmp168, %tmp164 ; <i64> [#uses=1]
+ %tmp170 = add i64 %tmp167, %tmp125 ; <i64> [#uses=1]
+ %tmp171 = add i64 %tmp170, %tmp169 ; <i64> [#uses=1]
+ %tmp172 = add i64 %tmp171, %tmp164 ; <i64> [#uses=2]
+ %tmp173 = add i64 %tmp153, %tmp140 ; <i64> [#uses=1]
+ %tmp174 = add i64 %tmp166, %tmp173 ; <i64> [#uses=1]
+ %tmp175 = add i64 %tmp153, %tmp140 ; <i64> [#uses=1]
+ %tmp176 = add i64 %tmp174, %tmp175 ; <i64> [#uses=1]
+ %tmp177 = add i64 %tmp172, %tmp127 ; <i64> [#uses=5]
+ %tmp178 = add i64 %tmp166, %tmp176 ; <i64> [#uses=1]
+ %tmp179 = add i64 %tmp178, %tmp172 ; <i64> [#uses=6]
+ %tmp180 = call i64 @llvm.bswap.i64(i64 %tmp20) ; <i64> [#uses=1]
+ %tmp181 = add i64 %tmp164, %tmp151 ; <i64> [#uses=1]
+ %tmp182 = add i64 %tmp181, %tmp177 ; <i64> [#uses=1]
+ %tmp183 = add i64 %tmp180, %tmp138 ; <i64> [#uses=1]
+ %tmp184 = add i64 %tmp183, %tmp182 ; <i64> [#uses=1]
+ %tmp185 = add i64 %tmp184, %tmp177 ; <i64> [#uses=2]
+ %tmp186 = add i64 %tmp166, %tmp153 ; <i64> [#uses=1]
+ %tmp187 = add i64 %tmp179, %tmp186 ; <i64> [#uses=1]
+ %tmp188 = add i64 %tmp166, %tmp153 ; <i64> [#uses=1]
+ %tmp189 = add i64 %tmp187, %tmp188 ; <i64> [#uses=1]
+ %tmp190 = add i64 %tmp185, %tmp140 ; <i64> [#uses=4]
+ %tmp191 = add i64 %tmp179, %tmp189 ; <i64> [#uses=1]
+ %tmp192 = add i64 %tmp191, %tmp185 ; <i64> [#uses=4]
+ %tmp193 = call i64 @llvm.bswap.i64(i64 %tmp21) ; <i64> [#uses=1]
+ %tmp194 = add i64 %tmp177, %tmp164 ; <i64> [#uses=1]
+ %tmp195 = add i64 %tmp194, %tmp190 ; <i64> [#uses=1]
+ %tmp196 = add i64 %tmp193, %tmp151 ; <i64> [#uses=1]
+ %tmp197 = add i64 %tmp196, %tmp195 ; <i64> [#uses=1]
+ %tmp198 = add i64 %tmp197, %tmp190 ; <i64> [#uses=2]
+ %tmp199 = add i64 %tmp179, %tmp166 ; <i64> [#uses=1]
+ %tmp200 = add i64 %tmp192, %tmp199 ; <i64> [#uses=1]
+ %tmp201 = add i64 %tmp179, %tmp166 ; <i64> [#uses=1]
+ %tmp202 = add i64 %tmp200, %tmp201 ; <i64> [#uses=1]
+ %tmp203 = add i64 %tmp198, %tmp153 ; <i64> [#uses=3]
+ %tmp204 = add i64 %tmp192, %tmp202 ; <i64> [#uses=1]
+ %tmp205 = add i64 %tmp204, %tmp198 ; <i64> [#uses=2]
+ %tmp206 = call i64 @llvm.bswap.i64(i64 %tmp22) ; <i64> [#uses=1]
+ %tmp207 = add i64 %tmp190, %tmp177 ; <i64> [#uses=1]
+ %tmp208 = add i64 %tmp207, %tmp203 ; <i64> [#uses=1]
+ %tmp209 = add i64 %tmp206, %tmp164 ; <i64> [#uses=1]
+ %tmp210 = add i64 %tmp209, %tmp208 ; <i64> [#uses=1]
+ %tmp211 = add i64 %tmp210, %tmp203 ; <i64> [#uses=2]
+ %tmp212 = add i64 %tmp192, %tmp179 ; <i64> [#uses=1]
+ %tmp213 = add i64 %tmp205, %tmp212 ; <i64> [#uses=1]
+ %tmp214 = add i64 %tmp192, %tmp179 ; <i64> [#uses=1]
+ %tmp215 = add i64 %tmp213, %tmp214 ; <i64> [#uses=1]
+ %tmp216 = add i64 %tmp211, %tmp166 ; <i64> [#uses=2]
+ %tmp217 = add i64 %tmp205, %tmp215 ; <i64> [#uses=1]
+ %tmp218 = add i64 %tmp217, %tmp211 ; <i64> [#uses=1]
+ %tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23) ; <i64> [#uses=2]
+ store volatile i64 %tmp219, i64* @X, align 8
+ %tmp220 = add i64 %tmp203, %tmp190 ; <i64> [#uses=1]
+ %tmp221 = add i64 %tmp220, %tmp216 ; <i64> [#uses=1]
+ %tmp222 = add i64 %tmp219, %tmp177 ; <i64> [#uses=1]
+ %tmp223 = add i64 %tmp222, %tmp221 ; <i64> [#uses=1]
+ %tmp224 = add i64 %tmp223, %tmp216 ; <i64> [#uses=1]
+ %tmp225 = add i64 %tmp224, %tmp218 ; <i64> [#uses=1]
+ ret i64 %tmp225
+}
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
diff --git a/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll b/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll
new file mode 100644
index 0000000000..ad18a0c5b9
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2009-04-16-SpillerUnfold.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats 2>&1 | grep "Number of modref unfolded"
+; XFAIL: *
+; 69408 removed the opportunity for this optimization to work
+
+ %struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
+ %struct.anon = type { [16 x i64] }
+@K512 = external constant [80 x i64], align 32 ; <[80 x i64]*> [#uses=2]
+
+define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp {
+entry:
+ br label %bb349
+
+bb349: ; preds = %bb349, %entry
+ %e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ] ; <i64> [#uses=3]
+ %b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ] ; <i64> [#uses=2]
+ %asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind ; <i64> [#uses=1]
+ %0 = xor i64 0, %asmtmp356 ; <i64> [#uses=1]
+ %1 = add i64 0, %0 ; <i64> [#uses=1]
+ %2 = add i64 %1, 0 ; <i64> [#uses=1]
+ %3 = add i64 %2, 0 ; <i64> [#uses=1]
+ %4 = add i64 %3, 0 ; <i64> [#uses=5]
+ %asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind ; <i64> [#uses=1]
+ %asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind ; <i64> [#uses=0]
+ %5 = xor i64 %asmtmp372, 0 ; <i64> [#uses=0]
+ %6 = xor i64 0, %b.0472 ; <i64> [#uses=1]
+ %7 = and i64 %4, %6 ; <i64> [#uses=1]
+ %8 = xor i64 %7, 0 ; <i64> [#uses=1]
+ %9 = add i64 0, %8 ; <i64> [#uses=1]
+ %10 = add i64 %9, 0 ; <i64> [#uses=2]
+ %asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind ; <i64> [#uses=1]
+ %11 = xor i64 0, %asmtmp377 ; <i64> [#uses=1]
+ %12 = add i64 0, %11 ; <i64> [#uses=1]
+ %13 = add i64 %12, 0 ; <i64> [#uses=1]
+ %not381 = xor i64 0, -1 ; <i64> [#uses=1]
+ %14 = and i64 %e.0489, %not381 ; <i64> [#uses=1]
+ %15 = xor i64 0, %14 ; <i64> [#uses=1]
+ %16 = add i64 %15, 0 ; <i64> [#uses=1]
+ %17 = add i64 %16, %13 ; <i64> [#uses=1]
+ %18 = add i64 %17, 0 ; <i64> [#uses=1]
+ %19 = add i64 %18, 0 ; <i64> [#uses=2]
+ %20 = add i64 %19, %b.0472 ; <i64> [#uses=3]
+ %21 = add i64 %19, 0 ; <i64> [#uses=1]
+ %22 = add i64 %21, 0 ; <i64> [#uses=1]
+ %23 = add i32 0, 12 ; <i32> [#uses=1]
+ %24 = and i32 %23, 12 ; <i32> [#uses=1]
+ %25 = zext i32 %24 to i64 ; <i64> [#uses=1]
+ %26 = getelementptr [16 x i64]* null, i64 0, i64 %25 ; <i64*> [#uses=0]
+ %27 = add i64 0, %e.0489 ; <i64> [#uses=1]
+ %28 = add i64 %27, 0 ; <i64> [#uses=1]
+ %29 = add i64 %28, 0 ; <i64> [#uses=1]
+ %30 = add i64 %29, 0 ; <i64> [#uses=2]
+ %31 = and i64 %10, %4 ; <i64> [#uses=1]
+ %32 = xor i64 0, %31 ; <i64> [#uses=1]
+ %33 = add i64 %30, 0 ; <i64> [#uses=3]
+ %34 = add i64 %30, %32 ; <i64> [#uses=1]
+ %35 = add i64 %34, 0 ; <i64> [#uses=1]
+ %36 = and i64 %33, %20 ; <i64> [#uses=1]
+ %37 = xor i64 %36, 0 ; <i64> [#uses=1]
+ %38 = add i64 %37, 0 ; <i64> [#uses=1]
+ %39 = add i64 %38, 0 ; <i64> [#uses=1]
+ %40 = add i64 %39, 0 ; <i64> [#uses=1]
+ %41 = add i64 %40, 0 ; <i64> [#uses=1]
+ %42 = add i64 %41, %4 ; <i64> [#uses=3]
+ %43 = or i32 0, 6 ; <i32> [#uses=1]
+ %44 = and i32 %43, 14 ; <i32> [#uses=1]
+ %45 = zext i32 %44 to i64 ; <i64> [#uses=1]
+ %46 = getelementptr [16 x i64]* null, i64 0, i64 %45 ; <i64*> [#uses=1]
+ %not417 = xor i64 %42, -1 ; <i64> [#uses=1]
+ %47 = and i64 %20, %not417 ; <i64> [#uses=1]
+ %48 = xor i64 0, %47 ; <i64> [#uses=1]
+ %49 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
+ %50 = load i64* %49, align 8 ; <i64> [#uses=1]
+ %51 = add i64 %48, 0 ; <i64> [#uses=1]
+ %52 = add i64 %51, 0 ; <i64> [#uses=1]
+ %53 = add i64 %52, 0 ; <i64> [#uses=1]
+ %54 = add i64 %53, %50 ; <i64> [#uses=2]
+ %asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind ; <i64> [#uses=1]
+ %asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind ; <i64> [#uses=1]
+ %55 = xor i64 %asmtmp420, 0 ; <i64> [#uses=1]
+ %56 = xor i64 %55, %asmtmp421 ; <i64> [#uses=1]
+ %57 = add i64 %54, %10 ; <i64> [#uses=5]
+ %58 = add i64 %54, 0 ; <i64> [#uses=1]
+ %59 = add i64 %58, %56 ; <i64> [#uses=2]
+ %60 = or i32 0, 7 ; <i32> [#uses=1]
+ %61 = and i32 %60, 15 ; <i32> [#uses=1]
+ %62 = zext i32 %61 to i64 ; <i64> [#uses=1]
+ %63 = getelementptr [16 x i64]* null, i64 0, i64 %62 ; <i64*> [#uses=2]
+ %64 = load i64* null, align 8 ; <i64> [#uses=1]
+ %65 = lshr i64 %64, 6 ; <i64> [#uses=1]
+ %66 = xor i64 0, %65 ; <i64> [#uses=1]
+ %67 = xor i64 %66, 0 ; <i64> [#uses=1]
+ %68 = load i64* %46, align 8 ; <i64> [#uses=1]
+ %69 = load i64* null, align 8 ; <i64> [#uses=1]
+ %70 = add i64 %68, 0 ; <i64> [#uses=1]
+ %71 = add i64 %70, %67 ; <i64> [#uses=1]
+ %72 = add i64 %71, %69 ; <i64> [#uses=1]
+ %asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind ; <i64> [#uses=1]
+ %asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind ; <i64> [#uses=1]
+ %73 = xor i64 %asmtmp427, 0 ; <i64> [#uses=1]
+ %74 = xor i64 %73, %asmtmp428 ; <i64> [#uses=1]
+ %75 = and i64 %57, %42 ; <i64> [#uses=1]
+ %not429 = xor i64 %57, -1 ; <i64> [#uses=1]
+ %76 = and i64 %33, %not429 ; <i64> [#uses=1]
+ %77 = xor i64 %75, %76 ; <i64> [#uses=1]
+ %78 = getelementptr [80 x i64]* @K512, i64 0, i64 0 ; <i64*> [#uses=1]
+ %79 = load i64* %78, align 16 ; <i64> [#uses=1]
+ %80 = add i64 %77, %20 ; <i64> [#uses=1]
+ %81 = add i64 %80, %72 ; <i64> [#uses=1]
+ %82 = add i64 %81, %74 ; <i64> [#uses=1]
+ %83 = add i64 %82, %79 ; <i64> [#uses=1]
+ %asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind ; <i64> [#uses=1]
+ %asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind ; <i64> [#uses=1]
+ %84 = xor i64 %asmtmp432, 0 ; <i64> [#uses=1]
+ %85 = xor i64 %84, %asmtmp433 ; <i64> [#uses=1]
+ %86 = add i64 %83, %22 ; <i64> [#uses=2]
+ %87 = add i64 0, %85 ; <i64> [#uses=1]
+ %asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind ; <i64> [#uses=1]
+ %88 = xor i64 0, %asmtmp435 ; <i64> [#uses=1]
+ %89 = load i64* null, align 8 ; <i64> [#uses=3]
+ %asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind ; <i64> [#uses=1]
+ %asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind ; <i64> [#uses=1]
+ %90 = lshr i64 %89, 6 ; <i64> [#uses=1]
+ %91 = xor i64 %asmtmp436, %90 ; <i64> [#uses=1]
+ %92 = xor i64 %91, %asmtmp437 ; <i64> [#uses=1]
+ %93 = load i64* %63, align 8 ; <i64> [#uses=1]
+ %94 = load i64* null, align 8 ; <i64> [#uses=1]
+ %95 = add i64 %93, %88 ; <i64> [#uses=1]
+ %96 = add i64 %95, %92 ; <i64> [#uses=1]
+ %97 = add i64 %96, %94 ; <i64> [#uses=2]
+ store i64 %97, i64* %63, align 8
+ %98 = and i64 %86, %57 ; <i64> [#uses=1]
+ %not441 = xor i64 %86, -1 ; <i64> [#uses=1]
+ %99 = and i64 %42, %not441 ; <i64> [#uses=1]
+ %100 = xor i64 %98, %99 ; <i64> [#uses=1]
+ %101 = add i64 %100, %33 ; <i64> [#uses=1]
+ %102 = add i64 %101, %97 ; <i64> [#uses=1]
+ %103 = add i64 %102, 0 ; <i64> [#uses=1]
+ %104 = add i64 %103, 0 ; <i64> [#uses=1]
+ %e.0 = add i64 %104, %35 ; <i64> [#uses=1]
+ br label %bb349
+}
diff --git a/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll b/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll
new file mode 100644
index 0000000000..eb4a5c04a2
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2010-01-19-OptExtBug.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats 2>&1 | not grep ext-opt
+
+define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
+entry:
+ switch i8 undef, label %bb6 [
+ i8 9, label %bb5
+ i8 32, label %bb5
+ i8 10, label %bb5
+ i8 13, label %bb5
+ i8 12, label %bb5
+ ]
+
+bb5: ; preds = %entry, %entry, %entry, %entry, %entry
+ br label %bb6
+
+bb6: ; preds = %bb5, %entry
+ br i1 undef, label %bb7, label %bb9
+
+bb7: ; preds = %bb6
+ unreachable
+
+bb9: ; preds = %bb6
+ %0 = load i8* undef, align 1 ; <i8> [#uses=3]
+ br i1 undef, label %bb12, label %bb10
+
+bb10: ; preds = %bb9
+ br i1 undef, label %bb12, label %bb11
+
+bb11: ; preds = %bb10
+ unreachable
+
+bb12: ; preds = %bb10, %bb9
+ br i1 undef, label %bb13, label %bb14
+
+bb13: ; preds = %bb12
+ store i8 %0, i8* undef, align 1
+ %1 = zext i8 %0 to i32 ; <i32> [#uses=1]
+ br label %bb18
+
+bb14: ; preds = %bb12
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb13
+ %termcode.0 = phi i32 [ %1, %bb13 ], [ undef, %bb14 ] ; <i32> [#uses=2]
+ %2 = icmp eq i8 %0, 0 ; <i1> [#uses=1]
+ br i1 %2, label %bb21, label %bb19
+
+bb19: ; preds = %bb18
+ br i1 undef, label %bb21, label %bb20
+
+bb20: ; preds = %bb19
+ br label %bb21
+
+bb21: ; preds = %bb20, %bb19, %bb18
+ %termcode.1 = phi i32 [ %termcode.0, %bb18 ], [ %termcode.0, %bb19 ], [ undef, %bb20 ] ; <i32> [#uses=0]
+ unreachable
+}
diff --git a/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll
new file mode 100644
index 0000000000..47ef693cc2
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2011-06-12-FastAllocSpill.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats 2>&1 | FileCheck %s
+;
+; This test should not cause any spilling with RAFast.
+;
+; CHECK: Number of copies coalesced
+; CHECK-NOT: Number of stores added
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { i64, i64, i8*, i8* }
+%1 = type opaque
+%2 = type opaque
+%3 = type <{ i8*, i32, i32, void (%4*)*, i8*, i64 }>
+%4 = type { i8**, i32, i32, i8**, %5*, i64 }
+%5 = type { i64, i64 }
+%6 = type { i8*, i32, i32, i8*, %5* }
+
+@0 = external hidden constant %0
+
+define hidden void @f() ssp {
+bb:
+ %tmp5 = alloca i64, align 8
+ %tmp6 = alloca void ()*, align 8
+ %tmp7 = alloca %3, align 8
+ store i64 0, i64* %tmp5, align 8
+ br label %bb8
+
+bb8: ; preds = %bb23, %bb
+ %tmp15 = getelementptr inbounds %3* %tmp7, i32 0, i32 4
+ store i8* bitcast (%0* @0 to i8*), i8** %tmp15
+ %tmp16 = bitcast %3* %tmp7 to void ()*
+ store void ()* %tmp16, void ()** %tmp6, align 8
+ %tmp17 = load void ()** %tmp6, align 8
+ %tmp18 = bitcast void ()* %tmp17 to %6*
+ %tmp19 = getelementptr inbounds %6* %tmp18, i32 0, i32 3
+ %tmp20 = bitcast %6* %tmp18 to i8*
+ %tmp21 = load i8** %tmp19
+ %tmp22 = bitcast i8* %tmp21 to void (i8*)*
+ call void %tmp22(i8* %tmp20)
+ br label %bb23
+
+bb23: ; preds = %bb8
+ %tmp24 = load i64* %tmp5, align 8
+ %tmp25 = add i64 %tmp24, 1
+ store i64 %tmp25, i64* %tmp5, align 8
+ %tmp26 = icmp ult i64 %tmp25, 10
+ br i1 %tmp26, label %bb8, label %bb27
+
+bb27: ; preds = %bb23
+ ret void
+}
diff --git a/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll b/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll
new file mode 100644
index 0000000000..18a3313773
--- /dev/null
+++ b/test/CodeGen/X86/Stats/2012-03-26-PostRALICMBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -stats 2>&1 | \
+; RUN: not grep "Number of machine instructions hoisted out of loops post regalloc"
+
+; rdar://11095580
+
+%struct.ref_s = type { %union.color_sample, i16, i16 }
+%union.color_sample = type { i64 }
+
+@table = external global [3891 x i64]
+
+declare i32 @foo()
+
+define i32 @zarray(%struct.ref_s* nocapture %op) nounwind ssp {
+entry:
+ %call = tail call i32 @foo()
+ %tmp = ashr i32 %call, 31
+ %0 = and i32 %tmp, 1396
+ %index9 = add i32 %0, 2397
+ indirectbr i8* undef, [label %return, label %if.end]
+
+if.end: ; preds = %entry
+ %size5 = getelementptr inbounds %struct.ref_s* %op, i64 0, i32 2
+ %tmp6 = load i16* %size5, align 2
+ %tobool1 = icmp eq i16 %tmp6, 0
+ %1 = select i1 %tobool1, i32 1396, i32 -1910
+ %index10 = add i32 %index9, %1
+ indirectbr i8* undef, [label %return, label %while.body.lr.ph]
+
+while.body.lr.ph: ; preds = %if.end
+ %refs = bitcast %struct.ref_s* %op to %struct.ref_s**
+ %tmp9 = load %struct.ref_s** %refs, align 8
+ %tmp4 = zext i16 %tmp6 to i64
+ %index13 = add i32 %index10, 1658
+ %2 = sext i32 %index13 to i64
+ %3 = getelementptr [3891 x i64]* @table, i64 0, i64 %2
+ %blockaddress14 = load i64* %3, align 8
+ %4 = inttoptr i64 %blockaddress14 to i8*
+ indirectbr i8* %4, [label %while.body]
+
+while.body: ; preds = %while.body, %while.body.lr.ph
+ %index7 = phi i32 [ %index15, %while.body ], [ %index13, %while.body.lr.ph ]
+ %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+ %type_attrs = getelementptr %struct.ref_s* %tmp9, i64 %indvar, i32 1
+ store i16 32, i16* %type_attrs, align 2
+ %indvar.next = add i64 %indvar, 1
+ %exitcond5 = icmp eq i64 %indvar.next, %tmp4
+ %tmp7 = select i1 %exitcond5, i32 1648, i32 0
+ %index15 = add i32 %index7, %tmp7
+ %tmp8 = select i1 %exitcond5, i64 13, i64 0
+ %5 = sext i32 %index15 to i64
+ %6 = getelementptr [3891 x i64]* @table, i64 0, i64 %5
+ %blockaddress16 = load i64* %6, align 8
+ %7 = inttoptr i64 %blockaddress16 to i8*
+ indirectbr i8* %7, [label %return, label %while.body]
+
+return: ; preds = %while.body, %if.end, %entry
+ %retval.0 = phi i32 [ %call, %entry ], [ 0, %if.end ], [ 0, %while.body ]
+ ret i32 %retval.0
+}
diff --git a/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll b/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll
new file mode 100644
index 0000000000..33141680aa
--- /dev/null
+++ b/test/CodeGen/X86/Stats/MachineSink-PHIUse.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -disable-cgp-branch-opts -stats 2>&1 | grep "machine-sink"
+
+define fastcc void @t() nounwind ssp {
+entry:
+ br i1 undef, label %bb, label %bb4
+
+bb: ; preds = %entry
+ br i1 undef, label %return, label %bb3
+
+bb3: ; preds = %bb
+ unreachable
+
+bb4: ; preds = %entry
+ br i1 undef, label %bb.nph, label %return
+
+bb.nph: ; preds = %bb4
+ br label %bb5
+
+bb5: ; preds = %bb9, %bb.nph
+ %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1]
+ %tmp12 = add i64 %indvar, 1 ; <i64> [#uses=2]
+ %tmp13 = trunc i64 %tmp12 to i32 ; <i32> [#uses=0]
+ br i1 undef, label %bb9, label %bb6
+
+bb6: ; preds = %bb5
+ br i1 undef, label %bb9, label %bb7
+
+bb7: ; preds = %bb6
+ br i1 undef, label %bb9, label %bb8
+
+bb8: ; preds = %bb7
+ unreachable
+
+bb9: ; preds = %bb7, %bb6, %bb5
+ br i1 undef, label %bb5, label %return
+
+return: ; preds = %bb9, %bb4, %bb
+ ret void
+}
diff --git a/test/CodeGen/X86/Stats/constant-pool-remat-0.ll b/test/CodeGen/X86/Stats/constant-pool-remat-0.ll
new file mode 100644
index 0000000000..4be14d2128
--- /dev/null
+++ b/test/CodeGen/X86/Stats/constant-pool-remat-0.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -regalloc=greedy | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK: LCPI
+; CHECK: LCPI
+; CHECK: LCPI
+; CHECK-NOT: LCPI
+
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat
+; X64stat: 6 asm-printer
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat
+; X32stat: 12 asm-printer
+
+declare float @qux(float %y)
+
+define float @array(float %a) nounwind {
+ %n = fmul float %a, 9.0
+ %m = call float @qux(float %n)
+ %o = fmul float %m, 9.0
+ ret float %o
+}
diff --git a/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll b/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll
new file mode 100644
index 0000000000..064ee364d1
--- /dev/null
+++ b/test/CodeGen/X86/Stats/convert-2-addr-3-addr-inc64.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats 2>&1 | FileCheck %s -check-prefix=STATS
+; STATS: 9 asm-printer
+
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal 1({{%rsi|%rdx}}),
+
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
+entry:
+ %0 = add i32 %i2, 1 ; <i32> [#uses=1]
+ %1 = sext i32 %0 to i64 ; <i64> [#uses=1]
+ %2 = getelementptr i8* %ptr, i64 %1 ; <i8*> [#uses=1]
+ %3 = load i8* %2, align 1 ; <i8> [#uses=1]
+ %4 = icmp eq i8 0, %3 ; <i1> [#uses=1]
+ br i1 %4, label %bb3, label %bb34
+
+bb3: ; preds = %entry
+ %5 = add i32 %i2, 4 ; <i32> [#uses=0]
+ %6 = trunc i32 %5 to i8
+ ret i8 %6
+
+bb34: ; preds = %entry
+ ret i8 0
+}
+
diff --git a/test/CodeGen/X86/Stats/dagcombine-cse.ll b/test/CodeGen/X86/Stats/dagcombine-cse.ll
new file mode 100644
index 0000000000..af69531246
--- /dev/null
+++ b/test/CodeGen/X86/Stats/dagcombine-cse.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
+
+define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
+entry:
+ %tmp7 = mul i32 %idxY, %ref_frame_stride ; <i32> [#uses=2]
+ %tmp9 = add i32 %tmp7, %idxX ; <i32> [#uses=1]
+ %tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9 ; <i8*> [#uses=1]
+ %tmp1112 = bitcast i8* %tmp11 to i32* ; <i32*> [#uses=1]
+ %tmp13 = load i32* %tmp1112, align 4 ; <i32> [#uses=1]
+ %tmp18 = add i32 %idxX, 4 ; <i32> [#uses=1]
+ %tmp20.sum = add i32 %tmp18, %tmp7 ; <i32> [#uses=1]
+ %tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum ; <i8*> [#uses=1]
+ %tmp2122 = bitcast i8* %tmp21 to i16* ; <i16*> [#uses=1]
+ %tmp23 = load i16* %tmp2122, align 2 ; <i16> [#uses=1]
+ %tmp2425 = zext i16 %tmp23 to i64 ; <i64> [#uses=1]
+ %tmp26 = shl i64 %tmp2425, 32 ; <i64> [#uses=1]
+ %tmp2728 = zext i32 %tmp13 to i64 ; <i64> [#uses=1]
+ %tmp29 = or i64 %tmp26, %tmp2728 ; <i64> [#uses=1]
+ %tmp3454 = bitcast i64 %tmp29 to double ; <double> [#uses=1]
+ %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0 ; <<2 x double>> [#uses=1]
+ %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1 ; <<2 x double>> [#uses=1]
+ %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 > ; <<8 x i16>> [#uses=1]
+ %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32> ; <<4 x i32>> [#uses=1]
+ %tmp48 = extractelement <4 x i32> %tmp47, i32 0 ; <i32> [#uses=1]
+ ret i32 %tmp48
+}
diff --git a/test/CodeGen/X86/Stats/hoist-invariant-load.ll b/test/CodeGen/X86/Stats/hoist-invariant-load.ll
new file mode 100644
index 0000000000..74ecd045b3
--- /dev/null
+++ b/test/CodeGen/X86/Stats/hoist-invariant-load.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -stats -O2 2>&1 | grep "1 machine-licm"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.7.2"
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+define void @test(i8* %x) uwtable ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %0 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !invariant.load !0
+ %call = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %x, i8* %0)
+ %inc = add i32 %i.01, 1
+ %exitcond = icmp eq i32 %inc, 10000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
+
+!0 = metadata !{}
diff --git a/test/CodeGen/X86/Stats/licm-nested.ll b/test/CodeGen/X86/Stats/licm-nested.ll
new file mode 100644
index 0000000000..c3f991d7a9
--- /dev/null
+++ b/test/CodeGen/X86/Stats/licm-nested.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep "hoisted out of loops" | grep 3
+
+; MachineLICM should be able to hoist the symbolic addresses out of
+; the inner loops.
+
+@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3]
+@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+ %cmp = icmp eq i32 %argc, 2 ; <i1> [#uses=1]
+ br i1 %cmp, label %while.cond.preheader, label %bb.nph53
+
+while.cond.preheader: ; preds = %entry
+ %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
+ %tmp2 = load i8** %arrayidx ; <i8*> [#uses=1]
+ %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2]
+ %tobool51 = icmp eq i32 %call, 0 ; <i1> [#uses=1]
+ br i1 %tobool51, label %while.end, label %bb.nph53
+
+while.cond.loopexit: ; preds = %for.inc35
+ %indvar.next77 = add i32 %indvar76, 1 ; <i32> [#uses=2]
+ %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; <i1> [#uses=1]
+ br i1 %exitcond78, label %while.end, label %bb.nph
+
+bb.nph53: ; preds = %entry, %while.cond.preheader
+ %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; <i32> [#uses=1]
+ br label %bb.nph
+
+bb.nph: ; preds = %while.cond.loopexit, %bb.nph53
+ %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; <i32> [#uses=1]
+ br label %for.body
+
+for.body: ; preds = %for.body, %bb.nph
+ %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2]
+ %tmp = add i64 %indvar, 2 ; <i64> [#uses=1]
+ %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1]
+ store i8 1, i8* %arrayidx10
+ %indvar.next = add i64 %indvar, 1 ; <i64> [#uses=2]
+ %exitcond = icmp eq i64 %indvar.next, 8191 ; <i1> [#uses=1]
+ br i1 %exitcond, label %for.body15, label %for.body
+
+for.body15: ; preds = %for.body, %for.inc35
+ %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; <i64> [#uses=4]
+ %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; <i32> [#uses=2]
+ %tmp68 = add i64 %indvar57, 2 ; <i64> [#uses=2]
+ %tmp70 = mul i64 %indvar57, 3 ; <i64> [#uses=1]
+ %tmp71 = add i64 %tmp70, 6 ; <i64> [#uses=1]
+ %tmp73 = shl i64 %indvar57, 1 ; <i64> [#uses=1]
+ %add = add i64 %tmp73, 4 ; <i64> [#uses=2]
+ %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
+ %tmp18 = load i8* %arrayidx17 ; <i8> [#uses=1]
+ %tobool19 = icmp eq i8 %tmp18, 0 ; <i1> [#uses=1]
+ br i1 %tobool19, label %for.inc35, label %if.then
+
+if.then: ; preds = %for.body15
+ %cmp2443 = icmp slt i64 %add, 8193 ; <i1> [#uses=1]
+ br i1 %cmp2443, label %for.body25, label %for.end32
+
+for.body25: ; preds = %if.then, %for.body25
+ %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2]
+ %tmp60 = mul i64 %tmp68, %indvar55 ; <i64> [#uses=2]
+ %tmp75 = add i64 %add, %tmp60 ; <i64> [#uses=1]
+ %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1]
+ store i8 0, i8* %arrayidx27
+ %add31 = add i64 %tmp71, %tmp60 ; <i64> [#uses=1]
+ %cmp24 = icmp slt i64 %add31, 8193 ; <i1> [#uses=1]
+ %indvar.next56 = add i64 %indvar55, 1 ; <i64> [#uses=1]
+ br i1 %cmp24, label %for.body25, label %for.end32
+
+for.end32: ; preds = %for.body25, %if.then
+ %inc34 = add nsw i32 %count.248, 1 ; <i32> [#uses=1]
+ br label %for.inc35
+
+for.inc35: ; preds = %for.body15, %for.end32
+ %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; <i32> [#uses=2]
+ %indvar.next58 = add i64 %indvar57, 1 ; <i64> [#uses=2]
+ %exitcond67 = icmp eq i64 %indvar.next58, 8191 ; <i1> [#uses=1]
+ br i1 %exitcond67, label %while.cond.loopexit, label %for.body15
+
+while.end: ; preds = %while.cond.loopexit, %while.cond.preheader
+ %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1]
+ %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
+
+declare i32 @atoi(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/Stats/lit.local.cfg b/test/CodeGen/X86/Stats/lit.local.cfg
new file mode 100644
index 0000000000..1a5fd5ec86
--- /dev/null
+++ b/test/CodeGen/X86/Stats/lit.local.cfg
@@ -0,0 +1,8 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+ config.unsupported = True
+
+if not config.root.enable_assertions:
+ config.unsupported = True
diff --git a/test/CodeGen/X86/Stats/phi-immediate-factoring.ll b/test/CodeGen/X86/Stats/phi-immediate-factoring.ll
new file mode 100644
index 0000000000..476bb10998
--- /dev/null
+++ b/test/CodeGen/X86/Stats/phi-immediate-factoring.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -stats 2>&1 | grep "Number of blocks eliminated" | grep 6
+; PR1296
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind {
+entry:
+ switch i32 %A, label %out [
+ i32 1, label %bb
+ i32 0, label %bb13
+ i32 2, label %bb35
+ ]
+
+bb: ; preds = %cond_next, %entry
+ %i.144.1 = phi i32 [ 0, %entry ], [ %tmp7, %cond_next ] ; <i32> [#uses=2]
+ %tmp4 = and i32 %i.144.1, %B ; <i32> [#uses=1]
+ icmp eq i32 %tmp4, 0 ; <i1>:0 [#uses=1]
+ br i1 %0, label %cond_next, label %out
+
+cond_next: ; preds = %bb
+ %tmp7 = add i32 %i.144.1, 1 ; <i32> [#uses=2]
+ icmp slt i32 %tmp7, 1000 ; <i1>:1 [#uses=1]
+ br i1 %1, label %bb, label %out
+
+bb13: ; preds = %cond_next18, %entry
+ %i.248.1 = phi i32 [ 0, %entry ], [ %tmp20, %cond_next18 ] ; <i32> [#uses=2]
+ %tmp16 = and i32 %i.248.1, %C ; <i32> [#uses=1]
+ icmp eq i32 %tmp16, 0 ; <i1>:2 [#uses=1]
+ br i1 %2, label %cond_next18, label %out
+
+cond_next18: ; preds = %bb13
+ %tmp20 = add i32 %i.248.1, 1 ; <i32> [#uses=2]
+ icmp slt i32 %tmp20, 1000 ; <i1>:3 [#uses=1]
+ br i1 %3, label %bb13, label %out
+
+bb27: ; preds = %bb35
+ %tmp30 = and i32 %i.3, %C ; <i32> [#uses=1]
+ icmp eq i32 %tmp30, 0 ; <i1>:4 [#uses=1]
+ br i1 %4, label %cond_next32, label %out
+
+cond_next32: ; preds = %bb27
+ %indvar.next = add i32 %i.3, 1 ; <i32> [#uses=1]
+ br label %bb35
+
+bb35: ; preds = %entry, %cond_next32
+ %i.3 = phi i32 [ %indvar.next, %cond_next32 ], [ 0, %entry ] ; <i32> [#uses=3]
+ icmp slt i32 %i.3, 1000 ; <i1>:5 [#uses=1]
+ br i1 %5, label %bb27, label %out
+
+out: ; preds = %bb27, %bb35, %bb13, %cond_next18, %bb, %cond_next, %entry
+ %result.0 = phi i32 [ 0, %entry ], [ 1, %bb ], [ 0, %cond_next ], [ 1, %bb13 ], [ 0, %cond_next18 ], [ 1, %bb27 ], [ 0, %bb35 ] ; <i32> [#uses=1]
+ ret i32 %result.0
+}
diff --git a/test/CodeGen/X86/Stats/pr3522.ll b/test/CodeGen/X86/Stats/pr3522.ll
new file mode 100644
index 0000000000..d8f37781fc
--- /dev/null
+++ b/test/CodeGen/X86/Stats/pr3522.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -stats 2>&1 | not grep "instructions sunk"
+; PR3522
+
+target triple = "i386-pc-linux-gnu"
+@.str = external constant [13 x i8] ; <[13 x i8]*> [#uses=1]
+
+define void @_ada_c34018a() {
+entry:
+ %0 = tail call i32 @report__ident_int(i32 90) ; <i32> [#uses=1]
+ %1 = trunc i32 %0 to i8 ; <i8> [#uses=1]
+ invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
+ to label %invcont unwind label %lpad
+
+invcont: ; preds = %entry
+ unreachable
+
+bb22: ; preds = %lpad
+ ret void
+
+return: ; preds = %lpad
+ ret void
+
+lpad: ; preds = %entry
+ %exn = landingpad {i8*, i32} personality i32 (...)* @__gxx_personality_v0
+ cleanup
+ %2 = icmp eq i8 %1, 90 ; <i1> [#uses=1]
+ br i1 %2, label %return, label %bb22
+}
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare i32 @report__ident_int(i32)
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/CodeGen/X86/Stats/regpressure.ll b/test/CodeGen/X86/Stats/regpressure.ll
new file mode 100644
index 0000000000..52d7b56f18
--- /dev/null
+++ b/test/CodeGen/X86/Stats/regpressure.ll
@@ -0,0 +1,114 @@
+;; Both functions in this testcase should codegen to the same function, and
+;; neither of them should require spilling anything to the stack.
+
+; RUN: llc < %s -march=x86 -stats 2>&1 | \
+; RUN: not grep "Number of register spills"
+
+;; This can be compiled to use three registers if the loads are not
+;; folded into the multiplies, 2 registers otherwise.
+
+define i32 @regpressure1(i32* %P) {
+ %A = load i32* %P ; <i32> [#uses=1]
+ %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1]
+ %B = load i32* %Bp ; <i32> [#uses=1]
+ %s1 = mul i32 %A, %B ; <i32> [#uses=1]
+ %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1]
+ %C = load i32* %Cp ; <i32> [#uses=1]
+ %s2 = mul i32 %s1, %C ; <i32> [#uses=1]
+ %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1]
+ %D = load i32* %Dp ; <i32> [#uses=1]
+ %s3 = mul i32 %s2, %D ; <i32> [#uses=1]
+ %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1]
+ %E = load i32* %Ep ; <i32> [#uses=1]
+ %s4 = mul i32 %s3, %E ; <i32> [#uses=1]
+ %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1]
+ %F = load i32* %Fp ; <i32> [#uses=1]
+ %s5 = mul i32 %s4, %F ; <i32> [#uses=1]
+ %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1]
+ %G = load i32* %Gp ; <i32> [#uses=1]
+ %s6 = mul i32 %s5, %G ; <i32> [#uses=1]
+ %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1]
+ %H = load i32* %Hp ; <i32> [#uses=1]
+ %s7 = mul i32 %s6, %H ; <i32> [#uses=1]
+ %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1]
+ %I = load i32* %Ip ; <i32> [#uses=1]
+ %s8 = mul i32 %s7, %I ; <i32> [#uses=1]
+ %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1]
+ %J = load i32* %Jp ; <i32> [#uses=1]
+ %s9 = mul i32 %s8, %J ; <i32> [#uses=1]
+ ret i32 %s9
+}
+
+define i32 @regpressure2(i32* %P) {
+ %A = load i32* %P ; <i32> [#uses=1]
+ %Bp = getelementptr i32* %P, i32 1 ; <i32*> [#uses=1]
+ %B = load i32* %Bp ; <i32> [#uses=1]
+ %Cp = getelementptr i32* %P, i32 2 ; <i32*> [#uses=1]
+ %C = load i32* %Cp ; <i32> [#uses=1]
+ %Dp = getelementptr i32* %P, i32 3 ; <i32*> [#uses=1]
+ %D = load i32* %Dp ; <i32> [#uses=1]
+ %Ep = getelementptr i32* %P, i32 4 ; <i32*> [#uses=1]
+ %E = load i32* %Ep ; <i32> [#uses=1]
+ %Fp = getelementptr i32* %P, i32 5 ; <i32*> [#uses=1]
+ %F = load i32* %Fp ; <i32> [#uses=1]
+ %Gp = getelementptr i32* %P, i32 6 ; <i32*> [#uses=1]
+ %G = load i32* %Gp ; <i32> [#uses=1]
+ %Hp = getelementptr i32* %P, i32 7 ; <i32*> [#uses=1]
+ %H = load i32* %Hp ; <i32> [#uses=1]
+ %Ip = getelementptr i32* %P, i32 8 ; <i32*> [#uses=1]
+ %I = load i32* %Ip ; <i32> [#uses=1]
+ %Jp = getelementptr i32* %P, i32 9 ; <i32*> [#uses=1]
+ %J = load i32* %Jp ; <i32> [#uses=1]
+ %s1 = mul i32 %A, %B ; <i32> [#uses=1]
+ %s2 = mul i32 %s1, %C ; <i32> [#uses=1]
+ %s3 = mul i32 %s2, %D ; <i32> [#uses=1]
+ %s4 = mul i32 %s3, %E ; <i32> [#uses=1]
+ %s5 = mul i32 %s4, %F ; <i32> [#uses=1]
+ %s6 = mul i32 %s5, %G ; <i32> [#uses=1]
+ %s7 = mul i32 %s6, %H ; <i32> [#uses=1]
+ %s8 = mul i32 %s7, %I ; <i32> [#uses=1]
+ %s9 = mul i32 %s8, %J ; <i32> [#uses=1]
+ ret i32 %s9
+}
+
+define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {
+ %A = load i16* %P ; <i16> [#uses=1]
+ %Bp = getelementptr i16* %P, i32 1 ; <i16*> [#uses=1]
+ %B = load i16* %Bp ; <i16> [#uses=1]
+ %Cp = getelementptr i16* %P, i32 2 ; <i16*> [#uses=1]
+ %C = load i16* %Cp ; <i16> [#uses=1]
+ %Dp = getelementptr i16* %P, i32 3 ; <i16*> [#uses=1]
+ %D = load i16* %Dp ; <i16> [#uses=1]
+ %Ep = getelementptr i16* %P, i32 4 ; <i16*> [#uses=1]
+ %E = load i16* %Ep ; <i16> [#uses=1]
+ %Fp = getelementptr i16* %P, i32 5 ; <i16*> [#uses=1]
+ %F = load i16* %Fp ; <i16> [#uses=1]
+ %Gp = getelementptr i16* %P, i32 6 ; <i16*> [#uses=1]
+ %G = load i16* %Gp ; <i16> [#uses=1]
+ %Hp = getelementptr i16* %P, i32 7 ; <i16*> [#uses=1]
+ %H = load i16* %Hp ; <i16> [#uses=1]
+ %Ip = getelementptr i16* %P, i32 8 ; <i16*> [#uses=1]
+ %I = load i16* %Ip ; <i16> [#uses=1]
+ %Jp = getelementptr i16* %P, i32 9 ; <i16*> [#uses=1]
+ %J = load i16* %Jp ; <i16> [#uses=1]
+ %A.upgrd.1 = sext i16 %A to i32 ; <i32> [#uses=1]
+ %B.upgrd.2 = sext i16 %B to i32 ; <i32> [#uses=1]
+ %D.upgrd.3 = sext i16 %D to i32 ; <i32> [#uses=1]
+ %C.upgrd.4 = sext i16 %C to i32 ; <i32> [#uses=1]
+ %E.upgrd.5 = sext i16 %E to i32 ; <i32> [#uses=1]
+ %F.upgrd.6 = sext i16 %F to i32 ; <i32> [#uses=1]
+ %G.upgrd.7 = sext i16 %G to i32 ; <i32> [#uses=1]
+ %H.upgrd.8 = sext i16 %H to i32 ; <i32> [#uses=1]
+ %I.upgrd.9 = sext i16 %I to i32 ; <i32> [#uses=1]
+ %J.upgrd.10 = sext i16 %J to i32 ; <i32> [#uses=1]
+ %s1 = add i32 %A.upgrd.1, %B.upgrd.2 ; <i32> [#uses=1]
+ %s2 = add i32 %C.upgrd.4, %s1 ; <i32> [#uses=1]
+ %s3 = add i32 %D.upgrd.3, %s2 ; <i32> [#uses=1]
+ %s4 = add i32 %E.upgrd.5, %s3 ; <i32> [#uses=1]
+ %s5 = add i32 %F.upgrd.6, %s4 ; <i32> [#uses=1]
+ %s6 = add i32 %G.upgrd.7, %s5 ; <i32> [#uses=1]
+ %s7 = add i32 %H.upgrd.8, %s6 ; <i32> [#uses=1]
+ %s8 = add i32 %I.upgrd.9, %s7 ; <i32> [#uses=1]
+ %s9 = add i32 %J.upgrd.10, %s8 ; <i32> [#uses=1]
+ ret i32 %s9
+}
diff --git a/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll b/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll
new file mode 100644
index 0000000000..af6d47af7a
--- /dev/null
+++ b/test/CodeGen/X86/Stats/twoaddr-coalesce-2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -stats 2>&1 | \
+; RUN: grep "twoaddrinstr" | grep "Number of instructions aggressively commuted"
+; rdar://6480363
+
+target triple = "i386-apple-darwin9.6"
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+ %tmp.i3 = bitcast <2 x double> %B to <2 x i64> ; <<2 x i64>> [#uses=1]
+ %tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458> ; <<2 x i64>> [#uses=1]
+ %tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double> ; <<2 x double>> [#uses=1]
+ %tmp.i2 = fadd <2 x double> %tmp3.i, %A ; <<2 x double>> [#uses=1]
+ %tmp.i = fadd <2 x double> %tmp.i2, %C ; <<2 x double>> [#uses=1]
+ ret <2 x double> %tmp.i
+}
diff --git a/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll b/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll
new file mode 100644
index 0000000000..513c304e3b
--- /dev/null
+++ b/test/CodeGen/X86/Stats/twoaddr-pass-sink.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
+
+define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind {
+entry:
+ %tmp25 = bitcast <2 x i64> %a1 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ br label %bb
+bb: ; preds = %bb, %entry
+ %skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
+ %vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3 ; <i32> [#uses=3]
+ %vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
+ %vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec ; <<2 x i64>*> [#uses=1]
+ %skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec ; <i8*> [#uses=1]
+ %vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1 ; <i32> [#uses=1]
+ %tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43 ; <<2 x i64>*> [#uses=1]
+ %tmp8 = load <2 x i64>* %tmp7, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp11 = load <2 x i64>* %vDct_addr.0, align 16 ; <<2 x i64>> [#uses=1]
+ %tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16> ; <<8 x i16>> [#uses=1]
+ %tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 > ; <<8 x i16>> [#uses=1]
+ %tmp26 = mul <8 x i16> %tmp25, %tmp16 ; <<8 x i16>> [#uses=1]
+ %tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64> ; <<2 x i64>> [#uses=1]
+ store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
+ %tmp37 = load i8* %skiplist_addr.0, align 1 ; <i8> [#uses=1]
+ %tmp38 = icmp eq i8 %tmp37, 0 ; <i1> [#uses=1]
+ %indvar.next = add i32 %skiplist_addr.0.rec, 1 ; <i32> [#uses=1]
+ br i1 %tmp38, label %return, label %bb
+return: ; preds = %bb
+ ret void
+}
diff --git a/test/CodeGen/X86/Stats/vec_insert-6.ll b/test/CodeGen/X86/Stats/vec_insert-6.ll
new file mode 100644
index 0000000000..2a4864a48a
--- /dev/null
+++ b/test/CodeGen/X86/Stats/vec_insert-6.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=penryn -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
+
+define <4 x float> @t3(<4 x float>* %P) nounwind {
+ %tmp1 = load <4 x float>* %P
+ %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+ ret <4 x float> %tmp2
+}
diff --git a/test/CodeGen/X86/Stats/vec_shuffle-19.ll b/test/CodeGen/X86/Stats/vec_shuffle-19.ll
new file mode 100644
index 0000000000..b26f920e5e
--- /dev/null
+++ b/test/CodeGen/X86/Stats/vec_shuffle-19.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=penryn -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; PR2485
+
+define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 > ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %shuffle
+}
diff --git a/test/CodeGen/X86/Stats/vec_shuffle-20.ll b/test/CodeGen/X86/Stats/vec_shuffle-20.ll
new file mode 100644
index 0000000000..b6b8ba6f84
--- /dev/null
+++ b/test/CodeGen/X86/Stats/vec_shuffle-20.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mcpu=corei7 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 2
+
+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind {
+entry:
+ shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 > ; <<4 x float>>:0 [#uses=1]
+ ret <4 x float> %0
+}
diff --git a/test/CodeGen/X86/Stats/zero-remat.ll b/test/CodeGen/X86/Stats/zero-remat.ll
new file mode 100644
index 0000000000..4242530f77
--- /dev/null
+++ b/test/CodeGen/X86/Stats/zero-remat.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+declare void @bar(double %x)
+declare void @barf(float %x)
+
+define double @foo() nounwind {
+
+ call void @bar(double 0.0)
+ ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: xorps
+;CHECK-64: call
+;CHECK-64: xorps
+;CHECK-64: ret
+}
+
+
+define float @foof() nounwind {
+ call void @barf(float 0.0)
+ ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: xorps
+;CHECK-64: call
+;CHECK-64: xorps
+;CHECK-64: ret
+}