109 files changed, 2471 insertions, 256 deletions
diff --git a/test/CodeGen/X86/2004-03-30-Select-Max.ll b/test/CodeGen/X86/2004-03-30-Select-Max.ll
index 526b0b206a..e22aa6a093 100644
--- a/test/CodeGen/X86/2004-03-30-Select-Max.ll
+++ b/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | not grep "j[lgbe]"
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT: {{j[lgbe]}}
 
 define i32 @max(i32 %A, i32 %B) nounwind {
         %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 6ec9a48849..a58c9b102d 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -52,8 +52,8 @@ entry:
         %tmp21 = load double* %tmp20            ; <double> [#uses=1]
         %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
         %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
-        br label %return
-return:         ; preds = %entry
+        br label %finish
+finish:
         %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
         ret i32 %retval.upgrd.8
 }
diff --git a/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
index adc825c039..783d9f94ca 100644
--- a/test/CodeGen/X86/2006-11-17-IllegalMove.ll
+++ b/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep movb %t | count 2
+; RUN: grep movb %t | count 1
 ; RUN: grep "movzb[wl]" %t
 
 
diff --git a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
index 88186cd6fa..e81534b011 100644
--- a/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
+++ b/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep "lea[[:space:]]R"
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | FileCheck %s
+; CHECK-NOT: lea R
 
 	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
 	%struct.AGenericManager = type <{ i8 }>
diff --git a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
index 266fd7b913..39af9319c8 100644
--- a/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
+++ b/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -10,10 +10,10 @@
 
 	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
 
-define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset, i1 %cond) nounwind  {
 entry:
 	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
-	br i1 true, label %bb951, label %bb917
+	br i1 %cond, label %bb951, label %bb917
 
 bb917:		; preds = %entry
 	ret i32 0
@@ -21,7 +21,7 @@ bb917:		; preds = %entry
 bb951:		; preds = %bb986, %entry
 	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
 	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
-	br i1 true, label %bb986, label %bb967
+	br i1 %cond, label %bb986, label %bb967
 
 bb967:		; preds = %bb951
 	ret i32 0
diff --git a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
index 66f06778bd..b2cf34cd20 100644
--- a/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -17,8 +17,7 @@ bb:		; preds = %bb, %entry
 ; CHECK: %bb30.loopexit
 ; CHECK: divsd %xmm0
 ; CHECK: movsd %xmm0, 16(%esp)
-; CHECK: .align
-; CHECK-NEXT: %bb3
+; CHECK: %bb3
 bb3:		; preds = %bb30.loopexit, %bb25, %bb3
 	%2 = load i32* null, align 4		; <i32> [#uses=1]
 	%3 = mul i32 %2, 0		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
index 1b2f20303b..d50fe6f73a 100644
--- a/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
+++ b/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "8 machine-licm"
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn -stats 2>&1 | grep "5 machine-licm"
 ; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -mcpu=penryn | FileCheck %s
 ; rdar://6627786
 ; rdar://7792037
diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
index 9f5a8c53be..5cb05e8a79 100644
--- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
+++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
-; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
+; RUN:     -mcpu=generic -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=basic < %s | \
 ; RUN:   FileCheck %s
 ; rdar://6808032
 
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index c6f4b497af..be10ad5cc2 100644
--- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -12,9 +12,9 @@ declare hidden fastcc void @_ZN3JSCL23returnToThrowTrampolineEPNS_12JSGlobalData
 
 ; Avoid hoisting the test above loads or copies
 ; CHECK: %entry
-; CHECK: cmpq
+; CHECK: test
 ; CHECK-NOT: mov
-; CHECK: jb
+; CHECK: je
 define i32 @cti_op_eq(i8** nocapture %args) nounwind ssp {
 entry:
   %0 = load i8** null, align 8
diff --git a/test/CodeGen/X86/2011-06-03-x87chain.ll b/test/CodeGen/X86/2011-06-03-x87chain.ll
index bf7f583aab..ce63c74fbd 100644
--- a/test/CodeGen/X86/2011-06-03-x87chain.ll
+++ b/test/CodeGen/X86/2011-06-03-x87chain.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mcpu=generic -march=x86 -mattr=+sse | FileCheck %s
 
 define float @chainfail1(i64* nocapture %a, i64* nocapture %b, i32 %x, i32 %y, float* nocapture %f) nounwind uwtable noinline ssp {
 entry:
diff --git a/test/CodeGen/X86/2011-08-29-InitOrder.ll b/test/CodeGen/X86/2011-08-29-InitOrder.ll
index 4d5f8d7857..a95dcb5807 100644
--- a/test/CodeGen/X86/2011-08-29-InitOrder.ll
+++ b/test/CodeGen/X86/2011-08-29-InitOrder.ll
@@ -3,7 +3,7 @@
 ; PR5329
 
 @llvm.global_ctors = appending global [3 x { i32, void ()* }] [{ i32, void ()* } { i32 2000, void ()* @construct_2 }, { i32, void ()* } { i32 3000, void ()* @construct_3 }, { i32, void ()* } { i32 1000, void ()* @construct_1 }]
-; CHECK-DEFAULT  .section        .ctors.64535,"aw",@progbits
+; CHECK-DEFAULT: .section        .ctors.64535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_1
 ; CHECK-DEFAULT: .section        .ctors.63535,"aw",@progbits
 ; CHECK-DEFAULT: .long construct_2
diff --git a/test/CodeGen/X86/2012-05-19-avx2-store.ll b/test/CodeGen/X86/2012-05-19-avx2-store.ll
index 61fef90139..1c1e8e2f0a 100644
--- a/test/CodeGen/X86/2012-05-19-avx2-store.ll
+++ b/test/CodeGen/X86/2012-05-19-avx2-store.ll
@@ -3,8 +3,7 @@
 define void @double_save(<4 x i32>* %Ap, <4 x i32>* %Bp, <8 x i32>* %P) nounwind ssp {
 entry:
   ; CHECK: vmovaps
-  ; CHECK: vmovaps
-  ; CHECK: vinsertf128
+  ; CHECK: vinsertf128 $1, ([[A0:%rdi|%rsi]]),
   ; CHECK: vmovups
   %A = load <4 x i32>* %Ap
   %B = load <4 x i32>* %Bp
diff --git a/test/CodeGen/X86/2012-07-10-extload64.ll b/test/CodeGen/X86/2012-07-10-extload64.ll
new file mode 100644
index 0000000000..906b748fa4
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-extload64.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: load_store
+define void @load_store(<4 x i16>* %in) {
+entry:
+; CHECK: movsd
+  %A27 = load <4 x i16>* %in, align 4
+  %A28 = add <4 x i16> %A27, %A27
+; CHECK: movlpd
+  store <4 x i16> %A28, <4 x i16>* %in, align 4
+  ret void
+; CHECK: ret
+}
+
+; Make sure that we store a 64bit value, even on 32bit systems.
+;CHECK: store_64
+define void @store_64(<2 x i32>* %ptr) {
+BB:
+  store <2 x i32> zeroinitializer, <2 x i32>* %ptr
+  ret void
+;CHECK: movlpd
+;CHECK: ret
+}
+
+;CHECK: load_64
+define <2 x i32> @load_64(<2 x i32>* %ptr) {
+BB:
+  %t = load <2 x i32>* %ptr
+  ret <2 x i32> %t
+;CHECK: movsd
+;CHECK: ret
+}
diff --git a/test/CodeGen/X86/2012-07-10-shufnorm.ll b/test/CodeGen/X86/2012-07-10-shufnorm.ll
new file mode 100644
index 0000000000..e39df58877
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-10-shufnorm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx | FileCheck %s
+
+; CHECK: ocl
+define void @ocl() {
+entry:
+  %vext = shufflevector <2 x double> zeroinitializer, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit = shufflevector <8 x double> %vext, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %vecinit1 = insertelement <8 x double> %vecinit, double undef, i32 2
+  %vecinit3 = insertelement <8 x double> %vecinit1, double undef, i32 3
+  %vecinit5 = insertelement <8 x double> %vecinit3, double 0.000000e+00, i32 4
+  %vecinit9 = shufflevector <8 x double> %vecinit5, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 8, i32 9, i32 10>
+  store <8 x double> %vecinit9, <8 x double>* undef
+  ret void
+; CHECK: vxorps
+; CHECK: ret
+}
+
diff --git a/test/CodeGen/X86/2012-07-15-broadcastfold.ll b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
new file mode 100644
index 0000000000..3b7a8a7b87
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-broadcastfold.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx2 | FileCheck %s
+
+declare x86_fastcallcc i64 @barrier()
+
+;CHECK: bcast_fold
+;CHECK: vmovaps %xmm{{[0-9]+}}, [[SPILLED:[^\)]+\)]]
+;CHECK: barrier
+;CHECK: vbroadcastss [[SPILLED]], %ymm0
+;CHECK: ret
+define <8 x float> @bcast_fold( float* %A) {
+BB:
+  %A0 = load float* %A
+  %tt3 = call x86_fastcallcc i64 @barrier()
+  br i1 undef, label %work, label %exit
+
+work:
+  %A1 = insertelement <8 x float> undef, float %A0, i32 0
+  %A2 = shufflevector <8 x float> %A1, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <8 x float> %A2
+
+exit:
+  ret <8 x float> undef
+}
diff --git a/test/CodeGen/X86/2012-07-15-tconst_shl.ll b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
new file mode 100644
index 0000000000..46eca7644e
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-tconst_shl.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 -mattr=+avx2
+; make sure that we are not crashing.
+
+define <16 x i32> @autogen_SD34717() {
+BB:
+  %Shuff7 = shufflevector <16 x i32> zeroinitializer, <16 x i32> zeroinitializer, <16 x i32> <i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 undef, i32 22, i32 24, i32 26, i32 28, i32 30, i32 undef>
+  %B9 = lshr <16 x i32> zeroinitializer, %Shuff7
+  ret <16 x i32> %B9
+}
diff --git a/test/CodeGen/X86/2012-07-15-vshl.ll b/test/CodeGen/X86/2012-07-15-vshl.ll
new file mode 100644
index 0000000000..cd0fef469e
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-15-vshl.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7 -mattr=+avx
+; PR13352
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+define void @f_f() nounwind {
+allocas:
+  br label %for_loop29
+
+for_loop29:                                       ; preds = %safe_if_after_true, %allocas
+  %indvars.iv596 = phi i64 [ %indvars.iv.next597, %safe_if_after_true ], [ 0, %allocas ]
+  %0 = trunc i64 %indvars.iv596 to i32
+  %smear.15 = insertelement <16 x i32> undef, i32 %0, i32 15
+  %bitop = lshr <16 x i32> zeroinitializer, %smear.15
+  %bitop35 = and <16 x i32> %bitop, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %bitop35_to_bool = icmp ne <16 x i32> %bitop35, zeroinitializer
+  %val_to_boolvec32 = sext <16 x i1> %bitop35_to_bool to <16 x i32>
+  %floatmask.i526 = bitcast <16 x i32> %val_to_boolvec32 to <16 x float>
+  %mask1.i529 = shufflevector <16 x float> %floatmask.i526, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %"internal_mask&function_mask41_any" = icmp eq i32 undef, 0
+  br i1 %"internal_mask&function_mask41_any", label %safe_if_after_true, label %safe_if_run_true
+
+safe_if_after_true:                               ; preds = %for_loop29
+  %indvars.iv.next597 = add i64 %indvars.iv596, 1
+  br label %for_loop29
+
+safe_if_run_true:                                 ; preds = %for_loop29
+  %blend1.i583 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> undef, <8 x float> undef, <8 x float> %mask1.i529) nounwind
+  unreachable
+}
+
diff --git a/test/CodeGen/X86/2012-07-16-LeaUndef.ll b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
new file mode 100644
index 0000000000..9e5cbd2f33
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-LeaUndef.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD2543() {
+A:
+  %E83 = add i32 0, 1
+  %E820 = add i32 0, undef
+  br label %C
+C:
+  %B908 = add i32 %E83, %E820
+  store i32 %B908, i32* undef
+  %Sl2391 = select i1 undef, i32 undef, i32 %E83
+  %Cmp3114 = icmp ne i32 %Sl2391, undef
+  br i1 %Cmp3114, label %C, label %G
+G:
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
new file mode 100644
index 0000000000..17533a1e16
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-16-fp2ui-i1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD3100() {
+BB:
+  %FC123 = fptoui float 0x40693F5D00000000 to i1
+  br i1 %FC123, label %V, label %W
+
+V:
+  ret void
+W:
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-07-17-vtrunc.ll b/test/CodeGen/X86/2012-07-17-vtrunc.ll
new file mode 100644
index 0000000000..2de2f97d7d
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-17-vtrunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+define void @autogen_SD33189483() {
+BB:
+  br label %CF76
+
+CF76:                                             ; preds = %CF76, %BB
+  %Shuff13 = shufflevector <4 x i64> zeroinitializer, <4 x i64> undef, <4 x i32> zeroinitializer
+  %Tr16 = trunc <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> to <8 x i1>
+  %E19 = extractelement <8 x i1> %Tr16, i32 2
+  br i1 %E19, label %CF76, label %CF78
+
+CF78:                                             ; preds = %CF78, %CF76
+  %BC = bitcast <4 x i64> %Shuff13 to <4 x double>
+  br label %CF78
+}
diff --git a/test/CodeGen/X86/2012-07-23-select_cc.ll b/test/CodeGen/X86/2012-07-23-select_cc.ll
new file mode 100644
index 0000000000..33fcb120e1
--- /dev/null
+++ b/test/CodeGen/X86/2012-07-23-select_cc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; PR 13428
+
+declare void @use(double)
+
+define void @test() {
+entry:
+  call void @use(double 1.000000e+00)
+  %A = icmp eq i64 undef, 2
+  %B = zext i1 %A to i32
+  %C = sitofp i32 %B to double
+  call void @use(double %C)
+  call void @use(double 0.000000e+00)
+  unreachable
+}
diff --git a/test/CodeGen/X86/2012-08-07-CmpISelBug.ll b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
new file mode 100644
index 0000000000..000b853ab8
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-07-CmpISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; Cmp lowering should not look past the truncate unless the high bits are known
+; zero.
+; rdar://12027825
+
+define void @foo(i8 %arg4, i32 %arg5, i32* %arg14) nounwind {
+bb:
+; CHECK: foo:
+; CHECK-NOT: testl
+; CHECK: testb
+  %tmp48 = zext i8 %arg4 to i32
+  %tmp49 = and i32 %tmp48, 32
+  %tmp50 = add i32 %tmp49, 1593371643
+  %tmp55 = sub i32 %tmp50, 0
+  %tmp56 = add i32 %tmp55, 7787538
+  %tmp57 = xor i32 %tmp56, 1601159181
+  %tmp58 = xor i32 %arg5, 1601159181
+  %tmp59 = and i32 %tmp57, %tmp58
+  %tmp60 = add i32 %tmp59, -1263900958
+  %tmp67 = sub i32 %tmp60, 0
+  %tmp103 = xor i32 %tmp56, 13
+  %tmp104 = trunc i32 %tmp103 to i8
+  %tmp105 = sub i8 0, %tmp104
+  %tmp106 = add i8 %tmp105, -103
+  %tmp113 = sub i8 %tmp106, 0
+  %tmp114 = add i8 %tmp113, -72
+  %tmp141 = icmp ne i32 %tmp67, -1263900958
+  %tmp142 = select i1 %tmp141, i8 %tmp114, i8 undef
+  %tmp143 = xor i8 %tmp142, 81
+  %tmp144 = zext i8 %tmp143 to i32
+  %tmp145 = add i32 %tmp144, 2062143348
+  %tmp152 = sub i32 %tmp145, 0
+  store i32 %tmp152, i32* %arg14
+  ret void
+}
diff --git a/test/CodeGen/X86/2012-08-16-setcc.ll b/test/CodeGen/X86/2012-08-16-setcc.ll
new file mode 100644
index 0000000000..ed511567c3
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-16-setcc.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+
+; rdar://12081007
+
+; CHECK: and_1:
+; CHECK: andb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @and_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: and_2:
+; CHECK: andb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @and_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = and i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}
+
+; CHECK: xor_1:
+; CHECK: xorb
+; CHECK-NEXT: cmovnel
+; CHECK: ret
+define i32 @xor_1(i8 zeroext %a, i8 zeroext %b, i32 %x) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  %3 = select i1 %2, i32 %x, i32 0
+  ret i32 %3
+}
+
+; CHECK: xor_2:
+; CHECK: xorb
+; CHECK-NEXT: setne
+; CHECK: ret
+define zeroext i1 @xor_2(i8 zeroext %a, i8 zeroext %b) {
+  %1 = xor i8 %b, %a
+  %2 = icmp ne i8 %1, 0
+  ret i1 %2
+}
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
new file mode 100644
index 0000000000..a65e688154
--- /dev/null
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; Check that an overly large immediate created by SROA doesn't crash the
+; legalizer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct._GtkSheetRow = type { i32*, i32, i32, i32, %struct._GtkSheetButton, i32, i32 }
+%struct._GtkSheetButton = type { i32, i32*, i32, i32*, i32 }
+
+@a = common global %struct._GtkSheetRow* null, align 8
+
+define void @fn1() nounwind uwtable ssp {
+entry:
+  %0 = load %struct._GtkSheetRow** @a, align 8
+  %1 = bitcast %struct._GtkSheetRow* %0 to i576*
+  %srcval2 = load i576* %1, align 8
+  %tobool = icmp ugt i576 %srcval2, 57586096570152913699974892898380567793532123114264532903689671329431521032595044740083720782129802971518987656109067457577065805510327036019308994315074097345724415
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i576 %srcval2, i576* %1, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+
+; CHECK: fn1:
+; CHECK: shrq $32, [[REG:%.*]]
+; CHECK: testq [[REG]], [[REG]]
+; CHECK: je
+}
diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll
index cc709b52d9..1f9e85cbb7 100644
--- a/test/CodeGen/X86/alignment-2.ll
+++ b/test/CodeGen/X86/alignment-2.ll
@@ -18,7 +18,9 @@
 define signext i8 @do_lo_list() nounwind optsize ssp {
 bb:
 ; CHECK:     do_lo_list
-; CHECK-NOT: movaps
+; Make sure we do not use movaps for the global variable.
+; It is okay to use movaps for writing the local variable on stack.
+; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}}
   %myopt = alloca %struct.printQueryOpt, align 4
   %tmp = bitcast %struct.printQueryOpt* %myopt to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)
diff --git a/test/CodeGen/X86/alloca-align-rounding-32.ll b/test/CodeGen/X86/alloca-align-rounding-32.ll
index 8a8b044d14..a45284e10c 100644
--- a/test/CodeGen/X86/alloca-align-rounding-32.ll
+++ b/test/CodeGen/X86/alloca-align-rounding-32.ll
@@ -15,5 +15,6 @@ define void @foo2(i32 %h) {
   call void @bar(<2 x i64>* %p)
   ret void
 ; CHECK: foo2
+; CHECK: andl $-32, %esp
 ; CHECK: andl $-32, %eax
 }
diff --git a/test/CodeGen/X86/alloca-align-rounding.ll b/test/CodeGen/X86/alloca-align-rounding.ll
index 7bc880625c..3d76fb0aa2 100644
--- a/test/CodeGen/X86/alloca-align-rounding.ll
+++ b/test/CodeGen/X86/alloca-align-rounding.ll
@@ -15,5 +15,6 @@ define void @foo2(i64 %h) {
   call void @bar(<2 x i64>* %p)
   ret void
 ; CHECK: foo2
+; CHECK: andq $-32, %rsp
 ; CHECK: andq $-32, %rax
 }
diff --git a/test/CodeGen/X86/atom-lea-sp.ll b/test/CodeGen/X86/atom-lea-sp.ll
index 59427880a7..19482e13d8 100644
--- a/test/CodeGen/X86/atom-lea-sp.ll
+++ b/test/CodeGen/X86/atom-lea-sp.ll
@@ -1,15 +1,15 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=atom %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck -check-prefix=ATOM %s
 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-linux | FileCheck %s
 
 declare void @use_arr(i8*)
 declare void @many_params(i32, i32, i32, i32, i32, i32)
 
 define void @test1() nounwind {
-; atom: test1:
-; atom: leal -1052(%esp), %esp
-; atom-NOT: sub
-; atom: call
-; atom: leal 1052(%esp), %esp
+; ATOM: test1:
+; ATOM: leal -1052(%esp), %esp
+; ATOM-NOT: sub
+; ATOM: call
+; ATOM: leal 1052(%esp), %esp
 
 ; CHECK: test1:
 ; CHECK: subl
@@ -22,10 +22,10 @@ define void @test1() nounwind {
 }
 
 define void @test2() nounwind {
-; atom: test2:
-; atom: leal -28(%esp), %esp
-; atom: call
-; atom: leal 28(%esp), %esp
+; ATOM: test2:
+; ATOM: leal -28(%esp), %esp
+; ATOM: call
+; ATOM: leal 28(%esp), %esp
 
 ; CHECK: test2:
 ; CHECK-NOT: lea
@@ -34,9 +34,9 @@ define void @test2() nounwind {
 }
 
 define void @test3() nounwind {
-; atom: test3:
-; atom: leal -8(%esp), %esp
-; atom: leal 8(%esp), %esp
+; ATOM: test3:
+; ATOM: leal -8(%esp), %esp
+; ATOM: leal 8(%esp), %esp
 
 ; CHECK: test3:
 ; CHECK-NOT: lea
diff --git a/test/CodeGen/X86/atomic-pointer.ll b/test/CodeGen/X86/atomic-pointer.ll
new file mode 100644
index 0000000000..a455277be4
--- /dev/null
+++ b/test/CodeGen/X86/atomic-pointer.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i686-none-linux | FileCheck %s
+
+define i32* @test_atomic_ptr_load(i32** %a0) {
+; CHECK: test_atomic_ptr_load
+; CHECK: movl
+; CHECK: movl
+; CHECK: ret
+0:
+  %0 = load atomic i32** %a0 seq_cst, align 4
+  ret i32* %0
+}
+
+define void @test_atomic_ptr_store(i32* %a0, i32** %a1) {
+; CHECK: test_atomic_ptr_store
+; CHECK: movl
+; CHECK: movl
+; CHECK: xchgl
+; CHECK: ret
+0:
+  store atomic i32* %a0, i32** %a1 seq_cst, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index a8fd8e3956..c44beb4bc2 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1154,7 +1154,7 @@ define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1165,7 +1165,7 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1176,7 +1176,7 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1187,7 +1187,7 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1198,7 +1198,7 @@ define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1209,6 +1209,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
   ; CHECK: movl
   ; CHECK: movl
   ; CHECK: vpcmpestrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
@@ -1226,7 +1227,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
 
 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seta
   %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1235,7 +1236,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sbbl
   %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1244,7 +1245,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: seto
   %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1253,7 +1254,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sets
   %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1262,7 +1263,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistri
-  ; CHECK: movl
+  ; CHECK: sete
   %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
   ret i32 %res
 }
@@ -1271,6 +1272,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpcmpistrm
+  ; CHECK-NOT: vmov
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
   ret <16 x i8> %res
 }
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 459dbb235a..a6141b0956 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1136,3 +1136,22 @@ define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1,
 }
 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
                       <4 x i64>, <4 x i32>, i8) nounwind readonly
+
+; PR13298
+define <8 x float>  @test_gather_mask(<8 x float> %a0, float* %a,
+                                      <8 x i32> %idx, <8 x float> %mask,
+                                      float* nocapture %out) {
+; CHECK: test_gather_mask
+; CHECK: vmovdqa %ymm2, [[DEST:%.*]]
+; CHECK: vgatherdps [[DEST]]
+;; gather with mask
+  %a_i8 = bitcast float* %a to i8*
+  %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
+                           i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
+
+;; for debugging, we'll just dump out the mask
+  %out_ptr = bitcast float * %out to <8 x float> *
+  store <8 x float> %mask, <8 x float> * %out_ptr, align 4
+
+  ret <8 x float> %res
+}
diff --git a/test/CodeGen/X86/block-placement.ll b/test/CodeGen/X86/block-placement.ll
index fc7b6383b8..5534712af8 100644
--- a/test/CodeGen/X86/block-placement.ll
+++ b/test/CodeGen/X86/block-placement.ll
@@ -7,10 +7,15 @@ define i32 @test_ifchains(i32 %i, i32* %a, i32 %b) {
 ; that is not expected to run.
 ; CHECK: test_ifchains:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %else1
+; CHECK-NOT: .align
 ; CHECK: %else2
+; CHECK-NOT: .align
 ; CHECK: %else3
+; CHECK-NOT: .align
 ; CHECK: %else4
+; CHECK-NOT: .align
 ; CHECK: %exit
 ; CHECK: %then1
 ; CHECK: %then2
@@ -76,8 +81,11 @@ define i32 @test_loop_cold_blocks(i32 %i, i32* %a) {
 ; Check that we sink cold loop blocks after the hot loop body.
 ; CHECK: test_loop_cold_blocks:
 ; CHECK: %entry
+; CHECK-NOT: .align
 ; CHECK: %unlikely1
+; CHECK-NOT: .align
 ; CHECK: %unlikely2
+; CHECK: .align
 ; CHECK: %body1
 ; CHECK: %body2
 ; CHECK: %body3
@@ -634,7 +642,7 @@ define void @test_unnatural_cfg_backwards_inner_loop() {
 ;
 ; CHECK: test_unnatural_cfg_backwards_inner_loop
 ; CHECK: %entry
-; CHECK: %body
+; CHECK: [[BODY:# BB#[0-9]+]]:
 ; CHECK: %loop2b
 ; CHECK: %loop1
 ; CHECK: %loop2a
diff --git a/test/CodeGen/X86/bool-simplify.ll b/test/CodeGen/X86/bool-simplify.ll
new file mode 100644
index 0000000000..0cb9fd9bc5
--- /dev/null
+++ b/test/CodeGen/X86/bool-simplify.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse41,-avx | FileCheck %s
+
+define i32 @foo(<2 x i64> %c, i32 %a, i32 %b) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp ne i32 %t1, 0
+  %t3 = select i1 %t2, i32 %a, i32 %b
+  ret i32 %t3
+; CHECK: foo
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: cmov
+; CHECK: ret
+}
+
+define i32 @bar(<2 x i64> %c) {
+entry:
+  %0 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %1 = icmp ne i32 %0, 0
+  br i1 %1, label %if-true-block, label %endif-block
+if-true-block:                                    ; preds = %entry
+  ret i32 0
+endif-block:                                      ; preds = %entry,
+  ret i32 1
+; CHECK: bar
+; CHECK: ptest
+; CHECK-NOT: testl
+; CHECK: jne
+; CHECK: ret
+}
+
+define i32 @bax(<2 x i64> %c) {
+  %t1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %c, <2 x i64> %c)
+  %t2 = icmp eq i32 %t1, 1
+  %t3 = zext i1 %t2 to i32
+  ret i32 %t3
+; CHECK: bax
+; CHECK: ptest
+; CHECK-NOT: cmpl
+; CHECK: ret
+}
+
+declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/CodeGen/X86/br-fold.ll b/test/CodeGen/X86/br-fold.ll
index 2c37194938..5223463011 100644
--- a/test/CodeGen/X86/br-fold.ll
+++ b/test/CodeGen/X86/br-fold.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s
 
 ; CHECK: orq
-; CHECK-NEXT: LBB0_1
+; CHECK-NEXT: %bb8.i329
 
 @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
 @_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll
index 3e65867143..4d801891da 100644
--- a/test/CodeGen/X86/break-sse-dep.ll
+++ b/test/CodeGen/X86/break-sse-dep.ll
@@ -34,8 +34,7 @@ entry:
 define double @squirt(double* %x) nounwind {
 entry:
 ; CHECK: squirt:
-; CHECK: movsd ([[A0]]), %xmm0
-; CHECK: sqrtsd %xmm0, %xmm0
+; CHECK: sqrtsd ([[A0]]), %xmm0
   %z = load double* %x
   %t = call double @llvm.sqrt.f64(double %z)
   ret double %t
diff --git a/test/CodeGen/X86/cfstring.ll b/test/CodeGen/X86/cfstring.ll
index 7420ce7304..8cdd59e9ae 100644
--- a/test/CodeGen/X86/cfstring.ll
+++ b/test/CodeGen/X86/cfstring.ll
@@ -4,7 +4,7 @@
 %0 = type opaque
 %struct.NSConstantString = type { i32*, i32, i8*, i32 }
 
-; Make sure that the string ends up the the correct section.
+; Make sure that the string ends up the correct section.
 
 ; CHECK:        .section __TEXT,__cstring
 ; CHECK-NEXT: l_.str3:
diff --git a/test/CodeGen/X86/cmov.ll b/test/CodeGen/X86/cmov.ll
index 43beac0b6f..ed25c82fdd 100644
--- a/test/CodeGen/X86/cmov.ll
+++ b/test/CodeGen/X86/cmov.ll
@@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test1:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovael	(%rcx), %eax
 ; CHECK-NEXT: ret
 
@@ -19,8 +19,8 @@ entry:
 define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
 entry:
 ; CHECK: test2:
-; CHECK: btl
-; CHECK-NEXT: movl	$12, %eax
+; CHECK: movl	$12, %eax
+; CHECK-NEXT: btl
 ; CHECK-NEXT: cmovbl	(%rcx), %eax
 ; CHECK-NEXT: ret
 
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index ef5e353e9f..eb06327f55 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -90,3 +90,64 @@ F:
 ; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
 }
 
+; rdar://11866926
+define i32 @test7(i64 %res) nounwind {
+entry:
+; CHECK: test7:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+  %lnot = icmp ult i64 %res, 4294967296
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test8(i64 %res) nounwind {
+entry:
+; CHECK: test8:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: cmpq $3, %rdi
+  %lnot = icmp ult i64 %res, 12884901888
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test9(i64 %res) nounwind {
+entry:
+; CHECK: test9:
+; CHECK-NOT: movabsq
+; CHECK: shrq $33, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: sete
+  %lnot = icmp ult i64 %res, 8589934592
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+define i32 @test10(i64 %res) nounwind {
+entry:
+; CHECK: test10:
+; CHECK-NOT: movabsq
+; CHECK: shrq $32, %rdi
+; CHECK: testq %rdi, %rdi
+; CHECK: setne
+  %lnot = icmp uge i64 %res, 4294967296
+  %lnot.ext = zext i1 %lnot to i32
+  ret i32 %lnot.ext
+}
+
+; rdar://9758774
+define i32 @test11(i64 %l) nounwind {
+entry:
+; CHECK: test11:
+; CHECK-NOT: movabsq
+; CHECK-NOT: andq
+; CHECK: shrq $47, %rdi
+; CHECK: cmpq $1, %rdi
+  %shr.mask = and i64 %l, -140737488355328
+  %cmp = icmp eq i64 %shr.mask, 140737488355328
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll
index f979945835..26318dd6c5 100644
--- a/test/CodeGen/X86/constant-pool-sharing.ll
+++ b/test/CodeGen/X86/constant-pool-sharing.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s
 
 ; llc should share constant pool entries between this integer vector
 ; and this floating-point vector since they have the same encoding.
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index c71c6ec81d..9badfc82e9 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -426,3 +426,19 @@ while.end:                                        ; preds = %if.then.i256
 return:                                           ; preds = %entry
   ret i64 -131
 }
+
+; The tail call to a varargs function sets %AL.
+; uitofp expands to an FCMOV instruction which splits the basic block.
+; Make sure the live range of %AL isn't split.
+@.str = private unnamed_addr constant { [1 x i8], [63 x i8] } zeroinitializer, align 32
+define void @pr13188(i64* nocapture %this) uwtable ssp address_safety align 2 {
+entry:
+  %x7 = load i64* %this, align 8
+  %sub = add i64 %x7, -1
+  %conv = uitofp i64 %sub to float
+  %div = fmul float %conv, 5.000000e-01
+  %conv2 = fpext float %div to double
+  tail call void (...)* @_Z6PrintFz(i8* getelementptr inbounds ({ [1 x i8], [63 x i8] }* @.str, i64 0, i32 0, i64 0), double %conv2)
+  ret void
+}
+declare void @_Z6PrintFz(...)
diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll
index 6406cc73e4..0a3dfca228 100644
--- a/test/CodeGen/X86/ctpop-combine.ll
+++ b/test/CodeGen/X86/ctpop-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s
 
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
 
diff --git a/test/CodeGen/X86/dbg-merge-loc-entry.ll b/test/CodeGen/X86/dbg-merge-loc-entry.ll
index c35935f015..d1e349f79d 100644
--- a/test/CodeGen/X86/dbg-merge-loc-entry.ll
+++ b/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin8"
 ;CHECK-NEXT:    .short  Lset
 ;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
-;CHECK-NEXT: Ltmp5
+;CHECK-NEXT: Ltmp
 ;CHECK-NEXT:	.quad	0
 ;CHECK-NEXT:	.quad	0
 
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index e577ecb85a..8e7c13d8ef 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -71,3 +71,24 @@ define i32 @test7(i32 %x) nounwind {
 ; CHECK-NOT: shrl
 ; CHECK: ret
 }
+
+; PR13326
+define i8 @test8(i8 %x) nounwind {
+  %div = udiv i8 %x, 78
+  ret i8 %div
+; CHECK: test8:
+; CHECK: shrb %
+; CHECK: imull $211
+; CHECK: shrl $13
+; CHECK: ret
+}
+
+define i8 @test9(i8 %x) nounwind {
+  %div = udiv i8 %x, 116
+  ret i8 %div
+; CHECK: test9:
+; CHECK: shrb $2
+; CHECK: imull $71
+; CHECK: shrl $11
+; CHECK: ret
+}
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
new file mode 100644
index 0000000000..c5e47facf3
--- /dev/null
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
+; rdar://11496434
+
+; no VLAs or dynamic alignment
+define i32 @t1() nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  call void @t1_helper(i32* %a) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t1
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
+; CHECK: callq _t1_helper
+; CHECK: movl [[OFFSET]](%rsp), %eax
+; CHECK: addl $13, %eax
+}
+
+declare void @t1_helper(i32*)
+
+; dynamic realignment
+define i32 @t2() nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %v = alloca <8 x float>, align 32
+  call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t2
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
+; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
+; CHECK: callq _t2_helper
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t2_helper(i32*, <8 x float>*)
+
+; VLAs
+define i32 @t3(i64 %sz) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t3_helper(i32* %a, i32* %vla) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t3
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %rbx
+; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %rbp
+}
+
+declare void @t3_helper(i32*, i32*)
+
+; VLAs + Dynamic realignment
+define i32 @t4(i64 %sz) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %v = alloca <8 x float>, align 32
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; CHECK: _t4
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: pushq %r14
+; CHECK: pushq %rbx
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+; CHECK: movq %rsp, %rbx
+;
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
+; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
+; CHECK: callq   _t4_helper
+;
+; CHECK: leaq -16(%rbp), %rsp
+; CHECK: popq %rbx
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+declare void @t4_helper(i32*, i32*, <8 x float>*)
+
+; Dynamic realignment + Spill
+define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
+entry:
+  %a = alloca i32, align 4
+  %0 = bitcast float* %f to <8 x float>*
+  %1 = load <8 x float>* %0, align 32
+  call void @t5_helper1(i32* %a) nounwind
+  call void @t5_helper2(<8 x float> %1) nounwind
+  %2 = load i32* %a, align 4
+  %add = add nsw i32 %2, 13
+  ret i32 %add
+
+; CHECK: _t5
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+; CHECK: andq $-32, %rsp
+; CHECK: subq ${{[0-9]+}}, %rsp
+;
+; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
+; CHECK: vmovaps [[AVXREG]], (%rsp)
+; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
+; CHECK: callq   _t5_helper1
+; CHECK: vmovaps (%rsp), %ymm0
+; CHECK: callq   _t5_helper2
+; CHECK: movl {{[0-9]+}}(%rsp), %eax
+;
+; CHECK: movq %rbp, %rsp
+; CHECK: popq %rbp
+}
+
+declare void @t5_helper1(i32*)
+
+declare void @t5_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + Spill
+; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
+define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
+entry:
+; CHECK: _t6
+  %a = alloca i32, align 4
+  %0 = bitcast float* %f to <8 x float>*
+  %1 = load <8 x float>* %0, align 32
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t6_helper1(i32* %a, i32* %vla) nounwind
+  call void @t6_helper2(<8 x float> %1) nounwind
+  %2 = load i32* %a, align 4
+  %add = add nsw i32 %2, 13
+  ret i32 %add
+}
+
+declare void @t6_helper1(i32*, i32*)
+
+declare void @t6_helper2(<8 x float>)
+
+; VLAs + Dynamic realignment + byval
+; The byval adjust the sp after the prolog, but if we're restoring the sp from
+; the base pointer we use the original adjustment.
+%struct.struct_t = type { [5 x i32] }
+
+define void @t7(i32 %size, %struct.struct_t* byval align 8 %arg1) nounwind uwtable {
+entry:
+  %x = alloca i32, align 32
+  store i32 0, i32* %x, align 32
+  %0 = zext i32 %size to i64
+  %vla = alloca i32, i64 %0, align 16
+  %1 = load i32* %x, align 32
+  call void @bar(i32 %1, i32* %vla, %struct.struct_t* byval align 8 %arg1)
+  ret void
+
+; CHECK: _t7
+; CHECK:     pushq %rbp
+; CHECK:     movq %rsp, %rbp
+; CHECK:     pushq %rbx
+; CHECK:     andq $-32, %rsp
+; CHECK:     subq ${{[0-9]+}}, %rsp
+; CHECK:     movq %rsp, %rbx
+
+; Stack adjustment for byval
+; CHECK:     subq {{.*}}, %rsp
+; CHECK:     callq _bar
+; CHECK-NOT: addq {{.*}}, %rsp
+; CHECK:     leaq -8(%rbp), %rsp
+; CHECK:     popq %rbx
+; CHECK:     popq %rbp
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @bar(i32, i32*, %struct.struct_t* byval align 8)
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+
+; Test when forcing stack alignment
+define i32 @t8() nounwind uwtable {
+entry:
+  %a = alloca i32, align 4
+  call void @t1_helper(i32* %a) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; FORCE-ALIGN: _t8
+; FORCE-ALIGN:      movq %rsp, %rbp
+; FORCE-ALIGN:      andq $-32, %rsp
+; FORCE-ALIGN-NEXT: subq $32, %rsp
+; FORCE-ALIGN:      movq %rbp, %rsp
+; FORCE-ALIGN:      popq %rbp
+}
+
+; VLAs
+define i32 @t9(i64 %sz) nounwind uwtable {
+entry:
+  %a = alloca i32, align 4
+  %vla = alloca i32, i64 %sz, align 16
+  call void @t3_helper(i32* %a, i32* %vla) nounwind
+  %0 = load i32* %a, align 4
+  %add = add nsw i32 %0, 13
+  ret i32 %add
+
+; FORCE-ALIGN: _t9
+; FORCE-ALIGN: pushq %rbp
+; FORCE-ALIGN: movq %rsp, %rbp
+; FORCE-ALIGN: pushq %rbx
+; FORCE-ALIGN: andq $-32, %rsp
+; FORCE-ALIGN: subq $32, %rsp
+; FORCE-ALIGN: movq %rsp, %rbx
+
+; FORCE-ALIGN: leaq -8(%rbp), %rsp
+; FORCE-ALIGN: popq %rbx
+; FORCE-ALIGN: popq %rbp
+}
diff --git a/test/CodeGen/X86/early-ifcvt.ll b/test/CodeGen/X86/early-ifcvt.ll
new file mode 100644
index 0000000000..7883ffabd5
--- /dev/null
+++ b/test/CodeGen/X86/early-ifcvt.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -enable-early-ifcvt -stress-early-ifcvt | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: mm2
+define i32 @mm2(i32* nocapture %p, i32 %n) nounwind uwtable readonly ssp {
+entry:
+  br label %do.body
+
+; CHECK: do.body
+; Loop body has no branches before the backedge.
+; CHECK-NOT: LBB
+do.body:
+  %max.0 = phi i32 [ 0, %entry ], [ %max.1, %do.cond ]
+  %min.0 = phi i32 [ 0, %entry ], [ %min.1, %do.cond ]
+  %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %do.cond ]
+  %p.addr.0 = phi i32* [ %p, %entry ], [ %incdec.ptr, %do.cond ]
+  %incdec.ptr = getelementptr inbounds i32* %p.addr.0, i64 1
+  %0 = load i32* %p.addr.0, align 4
+  %cmp = icmp sgt i32 %0, %max.0
+  br i1 %cmp, label %do.cond, label %if.else
+
+if.else:
+  %cmp1 = icmp slt i32 %0, %min.0
+  %.min.0 = select i1 %cmp1, i32 %0, i32 %min.0
+  br label %do.cond
+
+do.cond:
+  %max.1 = phi i32 [ %0, %do.body ], [ %max.0, %if.else ]
+  %min.1 = phi i32 [ %min.0, %do.body ], [ %.min.0, %if.else ]
+; CHECK: decl %esi
+; CHECK: jne LBB
+  %dec = add i32 %n.addr.0, -1
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %do.end, label %do.body
+
+do.end:
+  %sub = sub nsw i32 %max.1, %min.1
+  ret i32 %sub
+}
+
+; CHECK: multipreds
+; Deal with alternative tail predecessors
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: cmov
+; CHECK-NOT: LBB
+; CHECK: fprintf
+
+define void @multipreds(i32 %sw) nounwind uwtable ssp {
+entry:
+  switch i32 %sw, label %if.then29 [
+    i32 0, label %if.then37
+    i32 127, label %if.end41
+  ]
+
+if.then29:
+  br label %if.end41
+
+if.then37:
+  br label %if.end41
+
+if.end41:
+  %exit_status.0 = phi i32 [ 2, %if.then29 ], [ 0, %if.then37 ], [ 66, %entry ]
+  call void (...)* @fprintf(i32 %exit_status.0) nounwind
+  unreachable
+}
+
+declare void @fprintf(...) nounwind
diff --git a/test/CodeGen/X86/epilogue.ll b/test/CodeGen/X86/epilogue.ll
index 7ab10a5886..090680e48f 100644
--- a/test/CodeGen/X86/epilogue.ll
+++ b/test/CodeGen/X86/epilogue.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mcpu=generic -march=x86 | not grep lea
-; RUN: llc < %s -mcpu=generic -march=x86 | grep "movl	%ebp"
+; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
+
+; CHECK-NOT: lea{{.*}}(%esp)
+; CHECK: {{(mov.* %ebp, %esp)|(lea.*\(%ebp\), %esp)}}
 
 declare void @bar(<2 x i64>* %n)
 
diff --git a/test/CodeGen/X86/fabs.ll b/test/CodeGen/X86/fabs.ll
index 9ded7e05dc..af1867fc51 100644
--- a/test/CodeGen/X86/fabs.ll
+++ b/test/CodeGen/X86/fabs.ll
@@ -1,28 +1,54 @@
 ; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
-; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
-; RUN:   count 2
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
-; RUN:   grep fabs\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse2,-sse3,-sse | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=UNSAFE
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -O0 | FileCheck %s --check-prefix=NOOPT
 
 declare float @fabsf(float)
 
 declare x86_fp80 @fabsl(x86_fp80)
 
+; CHECK:  test1:
+; UNSAFE: test1:
+; NOOPT:  test1:
 define float @test1(float %X) {
-        %Y = call float @fabsf(float %X)
+        %Y = call float @fabsf(float %X) readnone
         ret float %Y
 }
+; CHECK:  {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
 
+; CHECK-NOT:  fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabsf
+
+; CHECK:  test2:
+; UNSAFE: test2:
+; NOOPT:  test2:
 define double @test2(double %X) {
         %Y = fcmp oge double %X, -0.0
         %Z = fsub double -0.0, %X
         %Q = select i1 %Y, double %X, double %Z
         ret double %Q
 }
+; fabs is not used here.
+; CHECK-NOT:  fabs
+; NOOPT-NOT:  fabs
+
+; UNSAFE: {{^[ \t]+fabs$}}
 
+; UNSAFE-NOT: fabs
+
+; CHECK:  test3:
+; UNSAFE: test3:
+; NOOPT:  test3:
 define x86_fp80 @test3(x86_fp80 %X) {
-        %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+        %Y = call x86_fp80 @fabsl(x86_fp80 %X) readnone
         ret x86_fp80 %Y
 }
+; CHECK:  {{^[ \t]+fabs$}}
+; UNSAFE: {{^[ \t]+fabs$}}
+; NOOPT:  {{^[ \t]+fabs$}}
 
-
+; CHECK-NOT:  fabs
+; UNSAFE-NOT: fabs
+; NOOPT-NOT:  fabs
diff --git a/test/CodeGen/X86/fast-isel-mem.ll b/test/CodeGen/X86/fast-isel-mem.ll
index 8db1936bc2..52b1e85643 100644
--- a/test/CodeGen/X86/fast-isel-mem.ll
+++ b/test/CodeGen/X86/fast-isel-mem.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=generic | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 @src = external global i32
 
@@ -18,6 +19,13 @@ entry:
 ; CHECK: 	movl	%eax, (%ecx)
 ; CHECK: 	ret
 
+; ATOM:	loadgv:
+; ATOM:		movl    L_src$non_lazy_ptr, %ecx
+; ATOM:         movl    (%ecx), %eax
+; ATOM:         addl    (%ecx), %eax
+; ATOM:         movl    %eax, (%ecx)
+; ATOM:         ret
+
 }
 
 %stuff = type { i32 (...)** }
@@ -31,4 +39,8 @@ entry:
 ; CHECK:	movl	$0, %eax
 ; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
 
+; ATOM: _t:
+; ATOM:         movl    L_LotsStuff$non_lazy_ptr, %ecx
+; ATOM:         movl    $0, %eax
+
 }
diff --git a/test/CodeGen/X86/fast-isel-x86.ll b/test/CodeGen/X86/fast-isel-x86.ll
index 19f38882a6..4caa3a039d 100644
--- a/test/CodeGen/X86/fast-isel-x86.ll
+++ b/test/CodeGen/X86/fast-isel-x86.ll
@@ -57,6 +57,6 @@ entry:
 ; CHECK: subl $28
 ; CHECK: leal (%esp), %ecx
 ; CHECK: calll _test4fastccsret
-; CHECK addl $28
+; CHECK: addl $28
 }
 declare fastcc void @test4fastccsret(%struct.a* sret)
diff --git a/test/CodeGen/X86/fast-isel.ll b/test/CodeGen/X86/fast-isel.ll
index 9d9a520c6a..132df2b0ab 100644
--- a/test/CodeGen/X86/fast-isel.ll
+++ b/test/CodeGen/X86/fast-isel.ll
@@ -117,3 +117,11 @@ define i64* @life() nounwind {
   ret i64* %a3
 }
 
+declare void @llvm.donothing() readnone
+
+; CHECK: donada
+define void @donada() nounwind {
+entry:
+  call void @llvm.donothing()
+  ret void
+}
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
index 5deedb9dd9..b0c1d0a0dd 100644
--- a/test/CodeGen/X86/fma.ll
+++ b/test/CodeGen/X86/fma.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10  -mattr=+fma  | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=i386-apple-darwin10               | FileCheck %s --check-prefix=CHECK-FMA-CALL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mattr=+fma | FileCheck %s --check-prefix=CHECK-FMA-INST
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10             | FileCheck %s --check-prefix=CHECK-FMA-CALL
 
 ; CHECK: test_f32
-; CHECK: _fmaf
+; CHECK-FMA-INST: vfmadd213ss
+; CHECK-FMA-CALL: _fmaf
 
 define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
 entry:
@@ -11,7 +14,8 @@ entry:
 }
 
 ; CHECK: test_f64
-; CHECK: _fma
+; CHECK-FMA-INST: vfmadd213sd
+; CHECK-FMA-CALL: _fma
 
 define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
 entry:
diff --git a/test/CodeGen/X86/fma3-intrinsics.ll b/test/CodeGen/X86/fma3-intrinsics.ll
index 8659dfe326..90529e09d7 100755
--- a/test/CodeGen/X86/fma3-intrinsics.ll
+++ b/test/CodeGen/X86/fma3-intrinsics.ll
@@ -1,42 +1,42 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 -mattr=avx2,+fma | FileCheck %s
 
 define <4 x float> @test_x86_fmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ss %xmm
+  ; CHECK: fmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmadd132ps
+  ; CHECK: fmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ss %xmm
+  ; CHECK: fnmadd213ss %xmm
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmadd132ps
+  ; CHECK: fnmadd213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  ; CHECK: fnmadd132ps {{.*\(%r.*}}, %ymm
+  ; CHECK: fnmadd213ps {{.*\(%r.*}}, %ymm
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) nounwind
   ret <8 x float> %res
 }
@@ -44,28 +44,28 @@ declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x
 
 
 define <4 x float> @test_x86_fmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ss
+  ; CHECK: fmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fmsub132ps
+  ; CHECK: fmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ss
+  ; CHECK: fnmsub213ss
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
-  ; CHECK: fnmsub132ps
+  ; CHECK: fnmsub213ps
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) nounwind
   ret <4 x float> %res
 }
@@ -74,28 +74,28 @@ declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x floa
 ;;;;
 
 define <2 x double> @test_x86_fmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132sd
+  ; CHECK: fmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmadd132pd
+  ; CHECK: fmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132sd
+  ; CHECK: fnmadd213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmadd132pd
+  ; CHECK: fnmadd213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
@@ -104,28 +104,28 @@ declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x d
 
 
 define <2 x double> @test_x86_fmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132sd
+  ; CHECK: fmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fmsub132pd
+  ; CHECK: fmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132sd
+  ; CHECK: fnmsub213sd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_fnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
-  ; CHECK: fnmsub132pd
+  ; CHECK: fnmsub213pd
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) nounwind
   ret <2 x double> %res
 }
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
new file mode 100644
index 0000000000..5d97a87b3b
--- /dev/null
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+
+; CHECK: test_x86_fmadd_ps
+; CHECK: vfmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fadd <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps
+; CHECK: fmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %x, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps
+; CHECK: fnmadd213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %res = fsub <4 x float> %a2, %x
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps
+; CHECK: fnmsub213ps     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  %x = fmul <4 x float> %a0, %a1
+  %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <4 x float> %y, %a2
+  ret <4 x float> %res
+}
+
+; CHECK: test_x86_fmadd_ps_y
+; CHECK: vfmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fadd <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y
+; CHECK: vfmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y
+; CHECK: vfnmadd213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %a2, %x
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y
+; CHECK: vfnmsub213ps     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  %x = fmul <8 x float> %a0, %a1
+  %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <8 x float> %y, %a2
+  ret <8 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y
+; CHECK: vfmadd213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fadd <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y
+; CHECK: vfmsub213pd     %ymm2, %ymm0, %ymm1
+; CHECK: ret
+define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd
+; CHECK: vfmsub213pd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+; CHECK: test_x86_fnmadd_ss
+; CHECK: vfnmadd213ss    %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
+  %x = fmul float %a0, %a1
+  %res = fsub float %a2, %x
+  ret float %res
+}
+
+; CHECK: test_x86_fnmadd_sd
+; CHECK: vfnmadd213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %a2, %x
+  ret double %res
+}
+
+; CHECK: test_x86_fmsub_sd
+; CHECK: vfmsub213sd     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+; CHECK: test_x86_fnmsub_ss
+; CHECK: vfnmsub213ss     %xmm2, %xmm0, %xmm1
+; CHECK: ret
+define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
+  %x = fsub float -0.000000e+00, %a0
+  %y = fmul float %x, %a1
+  %res = fsub float %y, %a2
+  ret float %res
+}
diff --git a/test/CodeGen/X86/fold-load.ll b/test/CodeGen/X86/fold-load.ll
index e03cb7edb5..d8366654c0 100644
--- a/test/CodeGen/X86/fold-load.ll
+++ b/test/CodeGen/X86/fold-load.ll
@@ -45,3 +45,29 @@ L:
 
 }
 
+; rdar://10554090
+; xor in exit block will be CSE'ed and load will be folded to xor in entry.
+define i1 @test3(i32* %P, i32* %Q) nounwind {
+; CHECK: test3:
+; CHECK: movl 8(%esp), %eax
+; CHECK: xorl (%eax),
+; CHECK: j
+; CHECK-NOT: xor
+entry:
+  %0 = load i32* %P, align 4
+  %1 = load i32* %Q, align 4
+  %2 = xor i32 %0, %1
+  %3 = and i32 %2, 89947
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %exit, label %land.end
+
+exit:
+  %shr.i.i19 = xor i32 %1, %0
+  %5 = and i32 %shr.i.i19, 3456789123
+  %6 = icmp eq i32 %5, 0
+  br label %land.end
+
+land.end:
+  %7 = phi i1 [ %6, %exit ], [ false, %entry ]
+  ret i1 %7
+}
diff --git a/test/CodeGen/X86/fold-pcmpeqd-1.ll b/test/CodeGen/X86/fold-pcmpeqd-1.ll
index cc4198d7ca..d850630a4d 100644
--- a/test/CodeGen/X86/fold-pcmpeqd-1.ll
+++ b/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -1,11 +1,16 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
-; RUN: grep pcmpeqd %t | count 1
-; RUN: grep xor %t | count 1
-; RUN: not grep LCP %t
+; RUN: llc < %s -march=x86 -mattr=+sse2,-avx | FileCheck %s
 
 define <2 x double> @foo() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+; CHECK: foo:
+; CHECK: pcmpeqd %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
 define <2 x double> @bar() nounwind {
   ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+; CHECK: bar:
+; CHECK: xorps %xmm0, %xmm0
+; CHECK-NOT: %xmm
+; CHECK: ret
 }
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index 48f963f58e..2ada194f89 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -3,7 +3,7 @@
 ; arbitrarily force alignment up to 32-bytes for i386 hoping that this will
 ; exceed any ABI provisions.
 ;
-; RUN: llc < %s -force-align-stack -stack-alignment=32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386-unknown-linux-gnu"
@@ -17,10 +17,15 @@ entry:
 
 define i64 @g(i32 %i) nounwind {
 ; CHECK: g:
-; CHECK:      pushl
+; CHECK:      pushl  %ebp
 ; CHECK-NEXT: movl   %esp, %ebp
 ; CHECK-NEXT: pushl
-; CHECK-NEXT: subl   $20, %esp
+; CHECK-NEXT: pushl
+; CHECK-NEXT: andl   $-32, %esp
+; CHECK-NEXT: subl   $32, %esp
+;
+; Now setup the base pointer (%esi).
+; CHECK-NEXT: movl   %esp, %esi
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
 ; The next adjustment of the stack is due to the alloca.
@@ -41,12 +46,14 @@ define i64 @g(i32 %i) nounwind {
 ; CHECK-NEXT: addl   $32, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
-; Finally we nede to restore %esp from %ebp, the alloca prevents us from
-; restoring it directly.
+; Restore %esp from %ebp (frame pointer) and subtract the size of
+; zone with callee-saved registers to pop them.
+; This is the state prior to stack realignment and the allocation of VLAs.
 ; CHECK-NOT:  popl
-; CHECK:      leal   -4(%ebp), %esp
+; CHECK:      leal   -8(%ebp), %esp
 ; CHECK-NEXT: popl
 ; CHECK-NEXT: popl
+; CHECK-NEXT: popl   %ebp
 ; CHECK-NEXT: ret
 
 entry:
diff --git a/test/CodeGen/X86/fp-in-intregs.ll b/test/CodeGen/X86/fp-in-intregs.ll
index 6966cf0497..1f5121d271 100644
--- a/test/CodeGen/X86/fp-in-intregs.ll
+++ b/test/CodeGen/X86/fp-in-intregs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-macosx -mcpu=yonah | FileCheck %s
 ; CHECK-NOT:     {{((xor|and)ps|movd)}}
 
 ; These operations should be done in integer registers, eliminating constant
diff --git a/test/CodeGen/X86/full-lsr.ll b/test/CodeGen/X86/full-lsr.ll
index 655ab29127..0729dda4a1 100644
--- a/test/CodeGen/X86/full-lsr.ll
+++ b/test/CodeGen/X86/full-lsr.ll
@@ -1,9 +1,17 @@
-; RUN: llc < %s -march=x86 >%t
-
-; RUN: grep "addl	\$4," %t | count 3
-; RUN: not grep ",%" %t
+; RUN: llc < %s -march=x86 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 
 define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+; ATOM: foo
+; ATOM: addl
+; ATOM: leal
+; ATOM: leal
+
+; CHECK: foo
+; CHECK: addl
+; CHECK: addl
+; CHECK: addl
+
 entry:
 	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
 	br i1 %0, label %bb, label %return
diff --git a/test/CodeGen/X86/gs-fold.ll b/test/CodeGen/X86/gs-fold.ll
new file mode 100644
index 0000000000..dbec76ba52
--- /dev/null
+++ b/test/CodeGen/X86/gs-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd | FileCheck %s --check-prefix=CHECK-FBSD
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CHECK-LINUX
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+%struct.thread = type { i32, i32, i32, i32 }
+
+define i32 @test() nounwind uwtable {
+entry:
+  %0 = load volatile %struct.thread* addrspace(256)* null
+  %c = getelementptr inbounds %struct.thread* %0, i64 0, i32 2
+  %1 = load i32* %c, align 4
+  ret i32 %1
+}
+
+; Check that we are not assuming that gs contains the address of gs if we are not targeting Linux
+; CHECK-FBSD: movq	%gs:0, %rax
+; CHECK-FBSD: movl	8(%rax), %eax
+; Check that we are assuming that gs contains the address of gs if we are targeting Linux
+; CHECK-LINUX: movl	%gs:8, %eax
+
diff --git a/test/CodeGen/X86/inreg.ll b/test/CodeGen/X86/inreg.ll
new file mode 100644
index 0000000000..6653cfb14e
--- /dev/null
+++ b/test/CodeGen/X86/inreg.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 | FileCheck --check-prefix=DAG %s
+; RUN: llc < %s -mtriple=i686-pc-linux -mcpu=corei7 -O0 | FileCheck --check-prefix=FAST %s
+
+%struct.s1 = type { double, float }
+
+define void @g1() nounwind {
+entry:
+  %tmp = alloca %struct.s1, align 4
+  call void @f(%struct.s1* inreg sret %tmp, i32 inreg 41, i32 inreg 42, i32 43)
+  ret void
+  ; DAG: g1:
+  ; DAG: subl $[[AMT:.*]], %esp
+  ; DAG-NEXT: $43, (%esp)
+  ; DAG-NEXT: leal    16(%esp), %eax
+  ; DAG-NEXT: movl    $41, %edx
+  ; DAG-NEXT: movl    $42, %ecx
+  ; DAG-NEXT: calll   f
+  ; DAG-NEXT: addl $[[AMT]], %esp
+  ; DAG-NEXT: ret
+
+  ; FAST: g1:
+  ; FAST: subl $[[AMT:.*]], %esp
+  ; FAST-NEXT: leal    8(%esp), %eax
+  ; FAST-NEXT: movl    $41, %edx
+  ; FAST-NEXT: movl    $42, %ecx
+  ; FAST: $43, (%esp)
+  ; FAST: calll   f
+  ; FAST-NEXT: addl $[[AMT]], %esp
+  ; FAST: ret
+}
+
+declare void @f(%struct.s1* inreg sret, i32 inreg, i32 inreg, i32)
+
+%struct.s2 = type {}
+
+define void @g2(%struct.s2* inreg sret %agg.result) nounwind {
+entry:
+  ret void
+  ; DAG: g2
+  ; DAG-NOT: ret $4
+  ; DAG: .size g2
+
+  ; FAST: g2
+  ; FAST-NOT: ret $4
+  ; FAST: .size g2
+}
diff --git a/test/CodeGen/X86/jump_sign.ll b/test/CodeGen/X86/jump_sign.ll
index 1bdf49ab43..48e21061d2 100644
--- a/test/CodeGen/X86/jump_sign.ll
+++ b/test/CodeGen/X86/jump_sign.ll
@@ -83,6 +83,73 @@ entry:
   %cond = select i1 %cmp, i32 %sub, i32 0
   ret i32 %cond
 }
+; redundant cmp instruction
+define i32 @l(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l:
+; CHECK-NOT: cmp
+  %cmp = icmp slt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %sub, i32 %a
+  ret i32 %cond
+}
+define i32 @m(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: m:
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %a, %b
+  %sub = sub nsw i32 %a, %b
+  %cond = select i1 %cmp, i32 %b, i32 %sub
+  ret i32 %cond
+}
+; If EFLAGS is live-out, we can't remove cmp if there exists
+; a swapped sub.
+define i32 @l2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l2:
+; CHECK: cmp
+  %cmp = icmp eq i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %cmp2 = icmp sgt i32 %b, %a
+  %sel = select i1 %cmp2, i32 %sub, i32 %a
+  ret i32 %sel
+
+if.else:
+  ret i32 %sub
+}
+define i32 @l3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l3:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: jge
+  %cmp = icmp sgt i32 %b, %a
+  %sub = sub nsw i32 %a, %b
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  ret i32 %sub
+
+if.else:
+  %add = add nsw i32 %sub, 1
+  ret i32 %add
+}
+; rdar://11830760
+; When Movr0 is between sub and cmp, we need to move "Movr0" before sub.
+define i32 @l4(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: l4:
+; CHECK: xor
+; CHECK: sub
+; CHECK-NOT: cmp
+  %cmp = icmp sgt i32 %b, %a
+  %sub = sub i32 %a, %b
+  %.sub = select i1 %cmp, i32 0, i32 %sub
+  ret i32 %.sub
+}
 ; rdar://11540023
 define i32 @n(i32 %x, i32 %y) nounwind {
 entry:
@@ -136,3 +203,53 @@ if.then.i103:                                     ; preds = %if.then44
 if.else.i104:                                     ; preds = %if.then44
   ret void
 }
+; rdar://11855129
+define i32 @p(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: p:
+; CHECK-NOT: test
+; CHECK: cmovs
+  %add = add nsw i32 %b, %a
+  %cmp = icmp sgt i32 %add, 0
+  %add. = select i1 %cmp, i32 %add, i32 0
+  ret i32 %add.
+}
+; PR13475
+; If we have sub a, b and cmp b, a and the result of cmp is used
+; by sbb, we should not optimize cmp away.
+define i32 @q(i32 %j.4, i32 %w, i32 %el) {
+; CHECK: q:
+; CHECK: sub
+; CHECK: cmp
+; CHECK-NEXT: sbb
+  %tmp532 = add i32 %j.4, %w
+  %tmp533 = icmp ugt i32 %tmp532, %el
+  %tmp534 = icmp ult i32 %w, %el
+  %or.cond = and i1 %tmp533, %tmp534
+  %tmp535 = sub i32 %el, %w
+  %j.5 = select i1 %or.cond, i32 %tmp535, i32 %j.4
+  ret i32 %j.5
+}
+; rdar://11873276
+define i8* @r(i8* %base, i32* nocapture %offset, i32 %size) nounwind {
+entry:
+; CHECK: r:
+; CHECK: sub
+; CHECK-NOT: cmp
+; CHECK: j
+; CHECK-NOT: sub
+; CHECK: ret
+  %0 = load i32* %offset, align 8
+  %cmp = icmp slt i32 %0, %size
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  %sub = sub nsw i32 %0, %size
+  store i32 %sub, i32* %offset, align 8
+  %add.ptr = getelementptr inbounds i8* %base, i32 %sub
+  br label %return
+
+return:
+  %retval.0 = phi i8* [ %add.ptr, %if.end ], [ null, %entry ]
+  ret i8* %retval.0
+}
diff --git a/test/CodeGen/X86/loop-blocks.ll b/test/CodeGen/X86/loop-blocks.ll
index d14102fe24..4bd162b452 100644
--- a/test/CodeGen/X86/loop-blocks.ll
+++ b/test/CodeGen/X86/loop-blocks.ll
@@ -41,7 +41,6 @@ done:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_4:
 ; CHECK-NEXT:   callq bar99
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB1_1:
 ; CHECK-NEXT:   callq body
 
@@ -79,7 +78,6 @@ exit:
 ; CHECK-NEXT: .LBB2_5:
 ; CHECK-NEXT:   callq block_a_true_func
 ; CHECK-NEXT:   callq block_a_merge_func
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB2_1:
 ; CHECK-NEXT:   callq body
 ;
@@ -139,13 +137,13 @@ exit:
 ; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_7:
 ; CHECK-NEXT:   callq   bar100
-; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_1:
 ; CHECK-NEXT:   callq   loop_header
 ;      CHECK:   jl .LBB3_7
 ;      CHECK:   jge .LBB3_3
 ; CHECK-NEXT:   callq   bar101
 ; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT:   align
 ; CHECK-NEXT: .LBB3_3:
 ;      CHECK:   jge .LBB3_4
 ; CHECK-NEXT:   callq   bar102
diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll
index ebda9f201d..8a81f70a8a 100644
--- a/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -1,10 +1,16 @@
-; RUN: llc -mtriple=x86_64-darwin < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=generic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: t:
 ; CHECK: decq
-; CHECK-NEXT: movl (
+; CHECK-NEXT: movl (%r9,%rax,4), %eax
 ; CHECK-NEXT: jne
 
+; ATOM: t:
+; ATOM: movl (%r9,%rax,4), %eax
+; ATOM-NEXT: decq
+; ATOM-NEXT: jne
+
 @Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
 @Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
 @Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
@@ -149,6 +155,13 @@ bb2:		; preds = %bb
 ; CHECK: jne
 ; CHECK: ret
 
+; ATOM: f:
+; ATOM: %for.body
+; ATOM: incl [[IV:%e..]]
+; ATOM: cmpl $1, [[IV]]
+; ATOM: jne
+; ATOM: ret
+
 define i32 @f(i32 %i, i32* nocapture %a) nounwind uwtable readonly ssp {
 entry:
   %cmp4 = icmp eq i32 %i, 1
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index c9ed3e553a..6566f56378 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mcpu=atom -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck -check-prefix=ATOM %s
 
 ; CHECK: xorl  %eax, %eax
 ; CHECK: movsd .LCPI0_0(%rip), %xmm0
@@ -9,6 +10,15 @@
 ; CHECK-NEXT: movsd
 ; CHECK-NEXT: incq %rax
 
+; ATOM: movsd .LCPI0_0(%rip), %xmm0
+; ATOM: xorl  %eax, %eax
+; ATOM: align
+; ATOM-NEXT: BB0_2:
+; ATOM-NEXT: movsd A(,%rax,8)
+; ATOM-NEXT: mulsd
+; ATOM-NEXT: movsd
+; ATOM-NEXT: incq %rax
+
 @A = external global [0 x double]
 
 define void @foo(i64 %n) nounwind {
diff --git a/test/CodeGen/X86/machine-cse.ll b/test/CodeGen/X86/machine-cse.ll
index a757cde6ab..d171fd5f1d 100644
--- a/test/CodeGen/X86/machine-cse.ll
+++ b/test/CodeGen/X86/machine-cse.ll
@@ -99,3 +99,60 @@ return:                                           ; preds = %if.end, %entry
   %retval.0 = phi i32 [ 1, %entry ], [ %., %if.end ]
   ret i32 %retval.0
 }
+
+; rdar://11393714
+define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
+; CHECK: %entry
+; CHECK: xorl
+; CHECK: %preheader
+; CHECK: %do.body
+; CHECK-NOT: xorl
+; CHECK: %do.cond
+; CHECK-NOT: xorl
+; CHECK: %return
+entry:
+  %cmp = icmp eq i64 %n, 0
+  br i1 %cmp, label %return, label %preheader
+
+preheader:
+  %conv2 = and i32 %c, 255
+  br label %do.body
+
+do.body:
+  %n.addr.0 = phi i64 [ %dec, %do.cond ], [ %n, %preheader ]
+  %p.0 = phi i8* [ %incdec.ptr, %do.cond ], [ %s, %preheader ]
+  %cmp3 = icmp eq i32 %a, %conv2
+  br i1 %cmp3, label %return, label %do.cond
+
+do.cond:
+  %incdec.ptr = getelementptr inbounds i8* %p.0, i64 1
+  %dec = add i64 %n.addr.0, -1
+  %cmp6 = icmp eq i64 %dec, 0
+  br i1 %cmp6, label %return, label %do.body
+
+return:
+  %retval.0 = phi i8* [ null, %entry ], [ null, %do.cond ], [ %p.0, %do.body ]
+  ret i8* %retval.0
+}
+
+; PR13578
+@t2_global = external global i32
+
+declare i1 @t2_func()
+
+define i32 @t2() {
+  store i32 42, i32* @t2_global
+  %c = call i1 @t2_func()
+  br i1 %c, label %a, label %b
+
+a:
+  %l = load i32* @t2_global
+  ret i32 %l
+
+b:
+  ret i32 0
+
+; CHECK: t2:
+; CHECK: t2_global@GOTPCREL(%rip)
+; CHECK-NOT: t2_global@GOTPCREL(%rip)
+}
diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll
index f4bc1bb701..723d1d8942 100644
--- a/test/CodeGen/X86/memcmp.ll
+++ b/test/CodeGen/X86/memcmp.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -disable-simplify-libcalls -mtriple=x86_64-linux | FileCheck %s --check-prefix=NOBUILTIN
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
 
 ; This tests codegen time inlining/optimization of memcmp
@@ -23,6 +24,8 @@ return:                                           ; preds = %entry
 ; CHECK: memcmp2:
 ; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
 ; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
+; NOBUILTIN: memcmp2:
+; NOBUILTIN: callq
 }
 
 define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 86c6862a53..39c7fbafd4 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -65,18 +65,18 @@ entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
   ret void
 ; LINUX: test4:
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
-; LINUX movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
+; LINUX: movq
 }
 
 
diff --git a/test/CodeGen/X86/mmx-punpckhdq.ll b/test/CodeGen/X86/mmx-punpckhdq.ll
index 689f7bf595..206cb33494 100644
--- a/test/CodeGen/X86/mmx-punpckhdq.ll
+++ b/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -3,7 +3,7 @@
 
 define void @bork(<1 x i64>* %x) {
 ; CHECK: bork
-; CHECK: pextrd
+; CHECK: movlpd
 entry:
 	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
 	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll
index aeb540fe42..65ee7b1d8e 100644
--- a/test/CodeGen/X86/movgs.ll
+++ b/test/CodeGen/X86/movgs.ll
@@ -55,4 +55,20 @@ entry:
 ; X64:	ret
 }
 
+; The two loads here both look identical to selection DAG, except for their
+; address spaces.  Make sure they aren't CSE'd.
+define i32 @test_no_cse() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	%tmp2 = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp1, %tmp3
+	ret i32 %tmp4
+}
+; X32: test_no_cse:
+; X32: 	movl	%gs:196
+; X32: 	movl	%fs:196
+; X32: 	ret
+
 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/phielim-split.ll b/test/CodeGen/X86/phielim-split.ll
new file mode 100644
index 0000000000..aa477359d6
--- /dev/null
+++ b/test/CodeGen/X86/phielim-split.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target triple = "x86_64-apple-macosx10.8.0"
+
+; The critical edge from for.cond to if.end2 should be split to avoid injecting
+; copies into the loop. The use of %b after the loop causes interference that
+; makes a copy necessary.
+; <rdar://problem/11561842>
+;
+; CHECK: split_loop_exit
+; CHECK: %for.cond
+; CHECK-NOT: mov
+; CHECK: je
+
+define i32 @split_loop_exit(i32 %a, i32 %b, i8* nocapture %p) nounwind uwtable readonly ssp {
+entry:
+  %cmp = icmp sgt i32 %a, 10
+  br i1 %cmp, label %for.cond, label %if.end2
+
+for.cond:                                         ; preds = %entry, %for.cond
+  %p.addr.0 = phi i8* [ %incdec.ptr, %for.cond ], [ %p, %entry ]
+  %incdec.ptr = getelementptr inbounds i8* %p.addr.0, i64 1
+  %0 = load i8* %p.addr.0, align 1
+  %tobool = icmp eq i8 %0, 0
+  br i1 %tobool, label %for.cond, label %if.end2
+
+if.end2:                                          ; preds = %for.cond, %entry
+  %r.0 = phi i32 [ %a, %entry ], [ %b, %for.cond ]
+  %add = add nsw i32 %r.0, %b
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/phys-reg-local-regalloc.ll b/test/CodeGen/X86/phys-reg-local-regalloc.ll
index c565684685..37eca1ce0a 100644
--- a/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast -optimize-regalloc=0 | FileCheck %s
-; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
-; CHECKed instructions should be the same with or without -O0.
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast | FileCheck %s
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 | FileCheck -check-prefix=ATOM %s
+; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling.
 
 @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
 
@@ -15,6 +16,19 @@ entry:
 ; CHECK: movl	%ebx, 40(%esp)
 ; CHECK-NOT: movl
 ; CHECK: addl %ebx, %eax
+
+; On Intel Atom the scheduler moves a movl instruction
+; used for the printf call to follow movl 24(%esp), %eax
+; ATOM: movl 24(%esp), %eax
+; ATOM: movl
+; ATOM: movl   %eax, 36(%esp)
+; ATOM-NOT: movl
+; ATOM: movl 28(%esp), %ebx
+; ATOM-NOT: movl
+; ATOM: movl   %ebx, 40(%esp)
+; ATOM-NOT: movl
+; ATOM: addl %ebx, %eax
+
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
   %"%eax" = alloca i32                            ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/pointer-vector.ll b/test/CodeGen/X86/pointer-vector.ll
index cc1df2fffc..800fbedb4f 100644
--- a/test/CodeGen/X86/pointer-vector.ll
+++ b/test/CodeGen/X86/pointer-vector.ll
@@ -105,8 +105,7 @@ define <2 x i32*> @BITCAST1(<2 x i8*>* %p) nounwind {
 entry:
   %G = load <2 x i8*>* %p
 ;CHECK: movl
-;CHECK: movd
-;CHECK: pinsrd
+;CHECK: movsd
   %T = bitcast <2 x i8*> %G to <2 x i32*>
 ;CHECK: ret
   ret <2 x i32*> %T
diff --git a/test/CodeGen/X86/pr11334.ll b/test/CodeGen/X86/pr11334.ll
new file mode 100644
index 0000000000..e7e29e0d60
--- /dev/null
+++ b/test/CodeGen/X86/pr11334.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
+entry:
+; CHECK: v2f2d_ext_vec
+; CHECK: cvtps2pd
+; AVX:   v2f2d_ext_vec
+; AVX:   vcvtps2pd
+  %f1 = fpext <2 x float> %v1 to <2 x double>
+  ret <2 x double> %f1
+}
+
+define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
+entry:
+; CHECK: v3f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v3f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <3 x float> %v1 to <3 x double>
+  ret <3 x double> %f1
+}
+
+define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
+entry:
+; CHECK: v4f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v4f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <4 x float> %v1 to <4 x double>
+  ret <4 x double> %f1
+}
+
+define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
+entry:
+; CHECK: v8f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v8f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   vextractf128
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <8 x float> %v1 to <8 x double>
+  ret <8 x double> %f1
+}
+
+define void @test_vector_creation() nounwind {
+  %1 = insertelement <4 x double> undef, double 0.000000e+00, i32 2
+  %2 = load double addrspace(1)* null
+  %3 = insertelement <4 x double> %1, double %2, i32 3
+  store <4 x double> %3, <4 x double>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
new file mode 100644
index 0000000000..f7e9adb4a2
--- /dev/null
+++ b/test/CodeGen/X86/pr11468.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; PR11468
+
+define void @f(i64 %sz) uwtable {
+entry:
+  %a = alloca i32, align 32
+  store volatile i32 0, i32* %a, align 32
+  ; force to push r14 on stack
+  call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+; CHECK: _f
+; CHECK: pushq %rbp
+; CHECK: .cfi_offset %rbp, -16
+; CHECK: movq %rsp, %rbp
+; CHECK: .cfi_def_cfa_register %rbp
+
+; We first push register on stack, and then realign it, so that
+; .cfi_offset value is correct
+; CHECK: pushq %r14
+; CHECK: andq $-32, %rsp
+; CHECK: .cfi_offset %r14, -24
+
+; Restore %rsp from %rbp and subtract the total size of saved regsiters.
+; CHECK: leaq -8(%rbp), %rsp
+
+; Pop saved registers.
+; CHECK: popq %r14
+; CHECK: popq %rbp
+}
+
+!0 = metadata !{i32 125}
+
diff --git a/test/CodeGen/X86/pr13209.ll b/test/CodeGen/X86/pr13209.ll
new file mode 100644
index 0000000000..1c93163659
--- /dev/null
+++ b/test/CodeGen/X86/pr13209.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s
+
+; CHECK: pr13209:
+; CHECK-NOT: mov
+; CHECK: .size pr13209
+
+define zeroext i1 @pr13209(i8** %x, i8*** %jumpTable) nounwind {
+if.end51:
+  br label %indirectgoto.preheader
+indirectgoto.preheader:
+  %frombool.i5915.ph = phi i8 [ undef, %if.end51 ], [ %frombool.i5917, %jit_return ]
+  br label %indirectgoto
+do.end165:
+  %tmp92 = load i8** %x, align 8
+  br label %indirectgoto
+do.end209:
+  %tmp104 = load i8** %x, align 8
+  br label %indirectgoto
+do.end220:
+  %tmp107 = load i8** %x, align 8
+  br label %indirectgoto
+do.end231:
+  %tmp110 = load i8** %x, align 8
+  br label %indirectgoto
+do.end242:
+  %tmp113 = load i8** %x, align 8
+  br label %indirectgoto
+do.end253:
+  %tmp116 = load i8** %x, align 8
+  br label %indirectgoto
+do.end286:
+  %tmp125 = load i8** %x, align 8
+  br label %indirectgoto
+do.end297:
+  %tmp128 = load i8** %x, align 8
+  br label %indirectgoto
+do.end308:
+  %tmp131 = load i8** %x, align 8
+  br label %indirectgoto
+do.end429:
+  %tmp164 = load i8** %x, align 8
+  br label %indirectgoto
+do.end440:
+  %tmp167 = load i8** %x, align 8
+  br label %indirectgoto
+do.body482:
+  br i1 false, label %indirectgoto, label %do.body495
+do.body495:
+  br label %indirectgoto
+do.end723:
+  br label %inline_return
+inline_return:
+  %frombool.i5917 = phi i8 [ 0, %if.end5571 ], [ %frombool.i5915, %do.end723 ]
+  br label %jit_return
+jit_return:
+  br label %indirectgoto.preheader
+L_JSOP_UINT24:
+  %tmp864 = load i8** %x, align 8
+  br label %indirectgoto
+L_JSOP_THROWING:
+  %tmp1201 = load i8** %x, align 8
+  br label %indirectgoto
+do.body4936:
+  %tmp1240 = load i8** %x, align 8
+  br label %indirectgoto
+do.body5184:
+  %tmp1340 = load i8** %x, align 8
+  br label %indirectgoto
+if.end5571:
+  br  label %inline_return
+indirectgoto:
+  %frombool.i5915 = phi i8  [ 0, %do.body495 ],[ 0, %do.body482 ] , [ %frombool.i5915, %do.body4936 ],[ %frombool.i5915, %do.body5184 ], [ %frombool.i5915, %L_JSOP_UINT24 ], [ %frombool.i5915, %do.end286 ], [ %frombool.i5915, %do.end297 ], [ %frombool.i5915, %do.end308 ], [ %frombool.i5915, %do.end429 ], [ %frombool.i5915, %do.end440 ], [ %frombool.i5915, %L_JSOP_THROWING ], [ %frombool.i5915, %do.end253 ], [ %frombool.i5915, %do.end242 ], [ %frombool.i5915, %do.end231 ], [ %frombool.i5915, %do.end220 ], [ %frombool.i5915, %do.end209 ],[ %frombool.i5915, %do.end165 ], [ %frombool.i5915.ph, %indirectgoto.preheader ]
+  indirectbr i8* null, [ label %if.end5571, label %do.end165, label %do.end209, label %do.end220, label %do.end231, label %do.end242, label %do.end253, label %do.end723, label %L_JSOP_THROWING, label %do.end440, label %do.end429, label %do.end308, label %do.end297, label %do.end286, label %L_JSOP_UINT24, label %do.body5184, label %do.body4936, label %do.body482]
+}
diff --git a/test/CodeGen/X86/pr13220.ll b/test/CodeGen/X86/pr13220.ll
new file mode 100644
index 0000000000..b9ac4b63ec
--- /dev/null
+++ b/test/CodeGen/X86/pr13220.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86 < %s
+; PR13220
+
+define <8 x i32> @foo(<8 x i96> %x) {
+  %a = lshr <8 x i96> %x, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+  %b = trunc <8 x i96> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+define <8 x i32> @bar(<8 x i97> %x) {
+  %a = lshr <8 x i97> %x, <i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1, i97 1>
+  %b = trunc <8 x i97> %a to <8 x i32>
+  ret <8 x i32> %b
+}
+
+define <8 x i32> @bax() {
+  %a = lshr <8 x i96> <i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4, i96 4>, <i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1, i96 1>
+  %b = trunc <8 x i96> %a to <8 x i32>
+  ret <8 x i32> %b
+}
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
new file mode 100644
index 0000000000..faaec262cb
--- /dev/null
+++ b/test/CodeGen/X86/pr13577.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64
+
+define x86_fp80 @foo(x86_fp80 %a) {
+  %1 = tail call x86_fp80 @copysignl(x86_fp80 0xK7FFF8000000000000000, x86_fp80 %a) nounwind readnone
+  ret x86_fp80 %1
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/rdrand.ll b/test/CodeGen/X86/rdrand.ll
new file mode 100644
index 0000000000..e2224a6196
--- /dev/null
+++ b/test/CodeGen/X86/rdrand.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core-avx-i -mattr=+rdrand | FileCheck %s
+declare {i16, i32} @llvm.x86.rdrand.16()
+declare {i32, i32} @llvm.x86.rdrand.32()
+declare {i64, i32} @llvm.x86.rdrand.64()
+
+define i32 @_rdrand16_step(i16* %random_val) {
+  %call = call {i16, i32} @llvm.x86.rdrand.16()
+  %randval = extractvalue {i16, i32} %call, 0
+  store i16 %randval, i16* %random_val
+  %isvalid = extractvalue {i16, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand16_step:
+; CHECK: rdrandw	%ax
+; CHECK: movw	%ax, (%r[[A0:di|cx]])
+; CHECK: movzwl	%ax, %ecx
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%ecx, %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand32_step(i32* %random_val) {
+  %call = call {i32, i32} @llvm.x86.rdrand.32()
+  %randval = extractvalue {i32, i32} %call, 0
+  store i32 %randval, i32* %random_val
+  %isvalid = extractvalue {i32, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand32_step:
+; CHECK: rdrandl	%e[[T0:[a-z]+]]
+; CHECK: movl	%e[[T0]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T0]], %eax
+; CHECK: ret
+}
+
+define i32 @_rdrand64_step(i64* %random_val) {
+  %call = call {i64, i32} @llvm.x86.rdrand.64()
+  %randval = extractvalue {i64, i32} %call, 0
+  store i64 %randval, i64* %random_val
+  %isvalid = extractvalue {i64, i32} %call, 1
+  ret i32 %isvalid
+; CHECK: _rdrand64_step:
+; CHECK: rdrandq	%r[[T1:[[a-z]+]]
+; CHECK: movq	%r[[T1]], (%r[[A0]])
+; CHECK: movl	$1, %eax
+; CHECK: cmovael	%e[[T1]], %eax
+; CHECK: ret
+}
+
+; Check that MachineCSE doesn't eliminate duplicate rdrand instructions.
+define i32 @CSE() nounwind {
+ %rand1 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v1 = extractvalue { i32, i32 } %rand1, 0
+ %rand2 = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+ %v2 = extractvalue { i32, i32 } %rand2, 0
+ %add = add i32 %v2, %v1
+ ret i32 %add
+; CHECK: CSE:
+; CHECK: rdrandl
+; CHECK: rdrandl
+}
+
+; Check that MachineLICM doesn't hoist rdrand instructions.
+define void @loop(i32* %p, i32 %n) nounwind {
+entry:
+  %tobool1 = icmp eq i32 %n, 0
+  br i1 %tobool1, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %p.addr.03 = phi i32* [ %incdec.ptr, %while.body ], [ %p, %entry ]
+  %n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
+  %dec = add nsw i32 %n.addr.02, -1
+  %incdec.ptr = getelementptr inbounds i32* %p.addr.03, i64 1
+  %rand = tail call { i32, i32 } @llvm.x86.rdrand.32() nounwind
+  %v1 = extractvalue { i32, i32 } %rand, 0
+  store i32 %v1, i32* %p.addr.03, align 4
+  %tobool = icmp eq i32 %dec, 0
+  br i1 %tobool, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+; CHECK: loop:
+; CHECK-NOT: rdrandl
+; CHECK: This Inner Loop Header: Depth=1
+; CHECK: rdrandl
+}
diff --git a/test/CodeGen/X86/remat-fold-load.ll b/test/CodeGen/X86/remat-fold-load.ll
new file mode 100644
index 0000000000..de77ad3756
--- /dev/null
+++ b/test/CodeGen/X86/remat-fold-load.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -disable-fp-elim -verify-coalescing
+; PR13414
+;
+; During coalescing, remat triggers DCE which deletes the penultimate use of a
+; load. This load should not be folded into the remaining use because it is not
+; safe to move, and it would extend the live range of the address.
+;
+; LiveRangeEdit::foldAsLoad() doesn't extend live ranges, so -verify-coalescing
+; catches the problem.
+
+target triple = "i386-unknown-linux-gnu"
+
+%type_a = type { %type_a*, %type_b }
+%type_b = type { %type_c, i32 }
+%type_c = type { i32, %type_d }
+%type_d = type { i64 }
+%type_e = type { %type_c, i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define linkonce_odr void @test() nounwind {
+entry:
+  br i1 undef, label %while.end.while.end26_crit_edge, label %while.body12.lr.ph
+
+while.end.while.end26_crit_edge:                  ; preds = %entry
+  br label %while.end26
+
+while.body12.lr.ph:                               ; preds = %entry
+  br label %while.body12
+
+while.body12:                                     ; preds = %if.end24, %while.body12.lr.ph
+  %tmp = phi %type_a* [ undef, %while.body12.lr.ph ], [ %tmp18, %if.end24 ]
+  %ins151154161 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp, %if.end24 ]
+  %ins135156160 = phi i128 [ 0, %while.body12.lr.ph ], [ %phitmp158, %if.end24 ]
+  %ins151 = or i128 0, %ins151154161
+  %cmp.i.i.i.i.i67 = icmp sgt i32 undef, 8
+  br i1 %cmp.i.i.i.i.i67, label %if.then.i.i.i.i71, label %if.else.i.i.i.i74
+
+if.then.i.i.i.i71:                                ; preds = %while.body12
+  %call4.i.i.i.i68 = call noalias i8* @malloc(i32 undef) nounwind
+  %tmp1 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1
+  %buf_6.i.i.i.i70 = bitcast %type_d* %tmp1 to i8**
+  %tmp2 = load i8** %buf_6.i.i.i.i70, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* %tmp2, i32 undef, i32 1, i1 false) nounwind
+  unreachable
+
+if.else.i.i.i.i74:                                ; preds = %while.body12
+  %i_.i.i.i.i72 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 0, i32 1, i32 0
+  %tmp3 = load i64* %i_.i.i.i.i72, align 4
+  %tmp4 = zext i64 %tmp3 to i128
+  %tmp5 = shl nuw nsw i128 %tmp4, 32
+  %ins148 = or i128 %tmp5, %ins151
+  %second3.i.i76 = getelementptr inbounds %type_a* %tmp, i32 0, i32 1, i32 1
+  %tmp6 = load i32* %second3.i.i76, align 4
+  %tmp7 = zext i32 %tmp6 to i128
+  %tmp8 = shl nuw i128 %tmp7, 96
+  %mask144 = and i128 %ins148, 79228162495817593519834398720
+  %tmp9 = load %type_e** undef, align 4
+  %len_.i.i.i.i86 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 0
+  %tmp10 = load i32* %len_.i.i.i.i86, align 4
+  %tmp11 = zext i32 %tmp10 to i128
+  %ins135 = or i128 %tmp11, %ins135156160
+  %cmp.i.i.i.i.i88 = icmp sgt i32 %tmp10, 8
+  br i1 %cmp.i.i.i.i.i88, label %if.then.i.i.i.i92, label %if.else.i.i.i.i95
+
+if.then.i.i.i.i92:                                ; preds = %if.else.i.i.i.i74
+  %call4.i.i.i.i89 = call noalias i8* @malloc(i32 %tmp10) nounwind
+  %ins126 = or i128 0, %ins135
+  %tmp12 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1
+  %buf_6.i.i.i.i91 = bitcast %type_d* %tmp12 to i8**
+  %tmp13 = load i8** %buf_6.i.i.i.i91, align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %call4.i.i.i.i89, i8* %tmp13, i32 %tmp10, i32 1, i1 false) nounwind
+  br label %A
+
+if.else.i.i.i.i95:                                ; preds = %if.else.i.i.i.i74
+  %i_.i.i.i.i93 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 0, i32 1, i32 0
+  br label %A
+
+A:                                                ; preds = %if.else.i.i.i.i95, %if.then.i.i.i.i92
+  %ins135157 = phi i128 [ %ins126, %if.then.i.i.i.i92 ], [ undef, %if.else.i.i.i.i95 ]
+  %second3.i.i97 = getelementptr inbounds %type_e* %tmp9, i32 0, i32 1
+  %tmp14 = load i64* %second3.i.i97, align 4
+  %tmp15 = trunc i64 %tmp14 to i32
+  %cmp.i99 = icmp sgt i32 %tmp6, %tmp15
+  %tmp16 = trunc i128 %ins135157 to i32
+  %cmp.i.i.i.i.i.i101 = icmp sgt i32 %tmp16, 8
+  br i1 %cmp.i.i.i.i.i.i101, label %if.then.i.i.i.i.i103, label %B
+
+if.then.i.i.i.i.i103:                             ; preds = %A
+  unreachable
+
+B:                                                ; preds = %A
+  %tmp17 = trunc i128 %ins148 to i32
+  %cmp.i.i.i.i.i.i83 = icmp sgt i32 %tmp17, 8
+  br i1 %cmp.i.i.i.i.i.i83, label %if.then.i.i.i.i.i85, label %C
+
+if.then.i.i.i.i.i85:                              ; preds = %B
+  unreachable
+
+C:                                                ; preds = %B
+  br i1 %cmp.i99, label %if.then17, label %if.end24
+
+if.then17:                                        ; preds = %C
+  br i1 false, label %if.then.i.i.i.i.i43, label %D
+
+if.then.i.i.i.i.i43:                              ; preds = %if.then17
+  unreachable
+
+D:                                                ; preds = %if.then17
+  br i1 undef, label %if.then.i.i.i.i.i, label %E
+
+if.then.i.i.i.i.i:                                ; preds = %D
+  unreachable
+
+E:                                                ; preds = %D
+  br label %if.end24
+
+if.end24:                                         ; preds = %E, %C
+  %phitmp = or i128 %tmp8, %mask144
+  %phitmp158 = or i128 undef, undef
+  %tmp18 = load %type_a** undef, align 4
+  %tmp19 = load %type_a** undef, align 4
+  %cmp.i49 = icmp eq %type_a* %tmp18, %tmp19
+  br i1 %cmp.i49, label %while.cond10.while.end26_crit_edge, label %while.body12
+
+while.cond10.while.end26_crit_edge:               ; preds = %if.end24
+  %.pre = load %type_e** undef, align 4
+  br label %while.end26
+
+while.end26:                                      ; preds = %while.cond10.while.end26_crit_edge, %while.end.while.end26_crit_edge
+  br i1 undef, label %while.body.lr.ph.i, label %F
+
+while.body.lr.ph.i:                               ; preds = %while.end26
+  br label %while.body.i
+
+while.body.i:                                     ; preds = %while.body.i, %while.body.lr.ph.i
+  br i1 false, label %while.body.i, label %F
+
+F:                                                ; preds = %while.body.i, %while.end26
+  ret void
+}
+
+declare noalias i8* @malloc(i32) nounwind
diff --git a/test/CodeGen/X86/reverse_branches.ll b/test/CodeGen/X86/reverse_branches.ll
new file mode 100644
index 0000000000..9772125037
--- /dev/null
+++ b/test/CodeGen/X86/reverse_branches.ll
@@ -0,0 +1,104 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+@.str2 = private unnamed_addr constant [7 x i8] c"memchr\00", align 1
+@.str3 = private unnamed_addr constant [11 x i8] c"bsd_memchr\00", align 1
+@str4 = private unnamed_addr constant [5 x i8] c"Bug!\00"
+
+; Make sure at end of do.cond.i, we jump to do.body.i first to have a tighter
+; inner loop.
+define i32 @test_branches_order() uwtable ssp {
+; CHECK: test_branches_order:
+; CHECK: [[L0:LBB0_[0-9]+]]: ## %do.body.i
+; CHECK: je
+; CHECK: %do.cond.i
+; CHECK: jne [[L0]]
+; CHECK: jmp
+; CHECK: %exit
+entry:
+  %strs = alloca [1000 x [1001 x i8]], align 16
+  br label %for.cond
+
+for.cond:
+  %j.0 = phi i32 [ 0, %entry ], [ %inc10, %for.inc9 ]
+  %cmp = icmp slt i32 %j.0, 1000
+  br i1 %cmp, label %for.cond1, label %for.end11
+
+for.cond1:
+  %indvars.iv50 = phi i64 [ %indvars.iv.next51, %for.body3 ], [ 0, %for.cond ]
+  %0 = trunc i64 %indvars.iv50 to i32
+  %cmp2 = icmp slt i32 %0, 1000
+  br i1 %cmp2, label %for.body3, label %for.inc9
+
+for.body3:
+  %arraydecay = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 0
+  %call = call i8* @memchr(i8* %arraydecay, i32 120, i64 1000)
+  %add.ptr = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv50, i64 %indvars.iv50
+  %cmp7 = icmp eq i8* %call, %add.ptr
+  %indvars.iv.next51 = add i64 %indvars.iv50, 1
+  br i1 %cmp7, label %for.cond1, label %if.then
+
+if.then:
+  %puts = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc9:
+  %inc10 = add nsw i32 %j.0, 1
+  br label %for.cond
+
+for.end11:
+  %puts42 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0))
+  br label %for.cond14
+
+for.cond14:
+  %j13.0 = phi i32 [ 0, %for.end11 ], [ %inc39, %for.inc38 ]
+  %cmp15 = icmp slt i32 %j13.0, 1000
+  br i1 %cmp15, label %for.cond18, label %for.end40
+
+for.cond18:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %exit ], [ 0, %for.cond14 ]
+  %1 = trunc i64 %indvars.iv to i32
+  %cmp19 = icmp slt i32 %1, 1000
+  br i1 %cmp19, label %for.body20, label %for.inc38
+
+for.body20:
+  %arraydecay24 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 0
+  br label %do.body.i
+
+do.body.i:
+  %n.addr.0.i = phi i64 [ %dec.i, %do.cond.i ], [ 1000, %for.body20 ]
+  %p.0.i = phi i8* [ %incdec.ptr.i, %do.cond.i ], [ %arraydecay24, %for.body20 ]
+  %2 = load i8* %p.0.i, align 1
+  %cmp3.i = icmp eq i8 %2, 120
+  br i1 %cmp3.i, label %exit, label %do.cond.i
+
+do.cond.i:
+  %incdec.ptr.i = getelementptr inbounds i8* %p.0.i, i64 1
+  %dec.i = add i64 %n.addr.0.i, -1
+  %cmp5.i = icmp eq i64 %dec.i, 0
+  br i1 %cmp5.i, label %if.then32, label %do.body.i
+
+exit:
+  %add.ptr30 = getelementptr inbounds [1000 x [1001 x i8]]* %strs, i64 0, i64 %indvars.iv, i64 %indvars.iv
+  %cmp31 = icmp eq i8* %p.0.i, %add.ptr30
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  br i1 %cmp31, label %for.cond18, label %if.then32
+
+if.then32:
+  %puts43 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @str4, i64 0, i64 0))
+  call void @exit(i32 1) noreturn
+  unreachable
+
+for.inc38:
+  %inc39 = add nsw i32 %j13.0, 1
+  br label %for.cond14
+
+for.end40:
+  %puts44 = call i32 @puts(i8* getelementptr inbounds ([11 x i8]* @.str3, i64 0, i64 0))
+  ret i32 0
+}
+
+declare i8* @memchr(i8*, i32, i64) nounwind readonly
+declare void @exit(i32) noreturn
+declare i32 @puts(i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 0dd74ea079..51fcf64184 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse41 | FileCheck -check-prefix=CHECK-SSE %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx | FileCheck -check-prefix=CHECK-AVX %s
 
 define float @test1(float %x) nounwind  {
   %call = tail call float @floorf(float %x) nounwind readnone
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index c8d9345c40..2e39473057 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=atom | FileCheck -check-prefix=ATOM %s
 ; PR5757
 
 %0 = type { i64, i32 }
@@ -12,6 +13,10 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
 ; CHECK: test1:
 ; CHECK: cmovneq %rdi, %rsi
 ; CHECK: movl (%rsi), %eax
+
+; ATOM: test1:
+; ATOM: cmovneq %rdi, %rsi
+; ATOM: movl (%rsi), %eax
 }
 
 
@@ -31,6 +36,10 @@ bb91:		; preds = %bb84
 ; CHECK: test2:
 ; CHECK: movnew
 ; CHECK: movswl
+
+; ATOM: test2:
+; ATOM: movnew
+; ATOM: movswl
 }
 
 declare i1 @return_false()
@@ -44,6 +53,9 @@ entry:
 	ret float %iftmp.0.0
 ; CHECK: test3:
 ; CHECK: movss	{{.*}},4), %xmm0
+
+; ATOM: test3:
+; ATOM: movss  {{.*}},4), %xmm0
 }
 
 define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
@@ -55,6 +67,9 @@ entry:
 	ret i8 %2
 ; CHECK: test4:
 ; CHECK: movsbl	({{.*}},4), %eax
+
+; ATOM: test4:
+; ATOM: movsbl ({{.*}},4), %eax
 }
 
 define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
@@ -62,6 +77,8 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
   store <2 x i16> %x, <2 x i16>* %p
   ret void
 ; CHECK: test5:
+
+; ATOM: test5:
 }
 
 define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
@@ -79,6 +96,12 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
 ; CHECK: ret
 ; CHECK: mulps
 ; CHECK: ret
+
+; ATOM: test6:
+; ATOM: je
+; ATOM: ret
+; ATOM: mulps
+; ATOM: ret
 }
 
 ; Select with fp80's
@@ -89,6 +112,10 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
 ; CHECK: test7:
 ; CHECK: leaq
 ; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+
+; ATOM: test7:
+; ATOM: leaq
+; ATOM: fldt (%r{{.}}x,%r{{.}}x)
 }
 
 ; widening select v6i32 and then a sub
@@ -99,6 +126,8 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
 	ret void
 
 ; CHECK: test8:
+
+; ATOM: test8:
 }
 
 
@@ -113,6 +142,12 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Same as test9
@@ -125,6 +160,12 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -137,6 +178,12 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test9b:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 ;; Select between -1 and 1.
@@ -149,6 +196,12 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: sbbq	%rax, %rax
 ; CHECK: orq	$1, %rax
 ; CHECK: ret
+
+; ATOM: test10:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: orq    $1, %rax
+; ATOM: ret
 }
 
 
@@ -163,6 +216,13 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
@@ -175,6 +235,13 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
 ; CHECK: notq %rax
 ; CHECK: orq	%rsi, %rax
 ; CHECK: ret
+
+; ATOM: test11a:
+; ATOM: cmpq   $1, %rdi
+; ATOM: sbbq   %rax, %rax
+; ATOM: notq %rax
+; ATOM: orq    %rsi, %rax
+; ATOM: ret
 }
 
 
@@ -189,10 +256,16 @@ entry:
   %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
   ret i8* %call
 ; CHECK: test12:
-; CHECK: mulq
 ; CHECK: movq $-1, %rdi
+; CHECK: mulq
 ; CHECK: cmovnoq	%rax, %rdi
 ; CHECK: jmp	__Znam
+
+; ATOM: test12:
+; ATOM: mulq
+; ATOM: movq $-1, %rdi
+; ATOM: cmovnoq        %rax, %rdi
+; ATOM: jmp    __Znam
 }
 
 declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
@@ -205,6 +278,11 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
 ; CHECK: cmpl
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: ret
+
+; ATOM: test13:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: ret
 }
 
 define i32 @test14(i32 %a, i32 %b) nounwind {
@@ -216,6 +294,12 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT: sbbl
 ; CHECK-NEXT: notl
 ; CHECK-NEXT: ret
+
+; ATOM: test14:
+; ATOM: cmpl
+; ATOM-NEXT: sbbl
+; ATOM-NEXT: notl
+; ATOM-NEXT: ret
 }
 
 ; rdar://10961709
@@ -227,6 +311,10 @@ entry:
 ; CHECK: test15:
 ; CHECK: negl
 ; CHECK: sbbl
+
+; ATOM: test15:
+; ATOM: negl
+; ATOM: sbbl
 }
 
 define i64 @test16(i64 %x) nounwind uwtable readnone ssp {
@@ -237,6 +325,10 @@ entry:
 ; CHECK: test16:
 ; CHECK: negq
 ; CHECK: sbbq
+
+; ATOM: test16:
+; ATOM: negq
+; ATOM: sbbq
 }
 
 define i16 @test17(i16 %x) nounwind {
@@ -247,4 +339,8 @@ entry:
 ; CHECK: test17:
 ; CHECK: negw
 ; CHECK: sbbw
+
+; ATOM: test17:
+; ATOM: negw
+; ATOM: sbbw
 }
diff --git a/test/CodeGen/X86/sext-setcc-self.ll b/test/CodeGen/X86/sext-setcc-self.ll
new file mode 100644
index 0000000000..23d66a2472
--- /dev/null
+++ b/test/CodeGen/X86/sext-setcc-self.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=x86-64 -mcpu=nehalem -asm-verbose=false < %s | FileCheck %s
+
+define <4 x i32> @test_ueq(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ueq <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_uge(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp uge <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ule(<4 x float> %in) {
+entry:
+  ; CHECK: pcmpeqd %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ule <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_one(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp one <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_ogt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp ogt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @test_olt(<4 x float> %in) {
+entry:
+  ; CHECK: xorps %xmm0, %xmm0
+  ; CHECK-NEXT: ret
+  %0 = fcmp olt <4 x float> %in, %in
+  %1 = sext <4 x i1> %0 to <4 x i32>
+  ret <4 x i32> %1
+}
diff --git a/test/CodeGen/X86/shift-and.ll b/test/CodeGen/X86/shift-and.ll
index b747cc5580..1de915164f 100644
--- a/test/CodeGen/X86/shift-and.ll
+++ b/test/CodeGen/X86/shift-and.ll
@@ -1,13 +1,27 @@
-; RUN: llc < %s -march=x86    | grep and | count 2
-; RUN: llc < %s -march=x86-64 | not grep and 
+; RUN: llc < %s -mtriple=i386-apple-macosx   | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s --check-prefix=X64
 
 define i32 @t1(i32 %t, i32 %val) nounwind {
+; X32: t1:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t1:
+; X64-NOT: andl
+; X64: shll
        %shamt = and i32 %t, 31
        %res = shl i32 %val, %shamt
        ret i32 %res
 }
 
 define i32 @t2(i32 %t, i32 %val) nounwind {
+; X32: t2:
+; X32-NOT: andl
+; X32: shll
+
+; X64: t2:
+; X64-NOT: andl
+; X64: shll
        %shamt = and i32 %t, 63
        %res = shl i32 %val, %shamt
        ret i32 %res
@@ -16,6 +30,13 @@ define i32 @t2(i32 %t, i32 %val) nounwind {
 @X = internal global i16 0
 
 define void @t3(i16 %t) nounwind {
+; X32: t3:
+; X32-NOT: andl
+; X32: sarw
+
+; X64: t3:
+; X64-NOT: andl
+; X64: sarw
        %shamt = and i16 %t, 31
        %tmp = load i16* @X
        %tmp1 = ashr i16 %tmp, %shamt
@@ -24,13 +45,34 @@ define void @t3(i16 %t) nounwind {
 }
 
 define i64 @t4(i64 %t, i64 %val) nounwind {
+; X64: t4:
+; X64-NOT: and
+; X64: shrq
        %shamt = and i64 %t, 63
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
 
 define i64 @t5(i64 %t, i64 %val) nounwind {
+; X64: t5:
+; X64-NOT: and
+; X64: shrq
        %shamt = and i64 %t, 191
        %res = lshr i64 %val, %shamt
        ret i64 %res
 }
+
+
+; rdar://11866926
+define i64 @t6(i64 %key, i64* nocapture %val) nounwind {
+entry:
+; X64: t6:
+; X64-NOT: movabsq
+; X64: decq
+; X64: andq
+  %shr = lshr i64 %key, 3
+  %0 = load i64* %val, align 8
+  %sub = add i64 %0, 2305843009213693951
+  %and = and i64 %sub, %shr
+  ret i64 %and
+}
diff --git a/test/CodeGen/X86/sincos.ll b/test/CodeGen/X86/sincos.ll
index 13f932982f..1479be1f56 100644
--- a/test/CodeGen/X86/sincos.ll
+++ b/test/CodeGen/X86/sincos.ll
@@ -1,8 +1,6 @@
 ; Make sure this testcase codegens to the sin and cos instructions, not calls
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
-; RUN:   grep sin\$ | count 3
-; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
-; RUN:   grep cos\$ | count 3
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=SIN
+; RUN: llc < %s -mtriple=i686-apple-macosx -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | FileCheck %s --check-prefix=COS
 
 declare float  @sinf(float) readonly
 
@@ -10,39 +8,59 @@ declare double @sin(double) readonly
 
 declare x86_fp80 @sinl(x86_fp80) readonly
 
+; SIN: test1:
 define float @test1(float %X) {
         %Y = call float @sinf(float %X) readonly
         ret float %Y
 }
+; SIN: {{^[ \t]*fsin$}}
 
+; SIN-NOT: fsin
+
+; SIN: test2:
 define double @test2(double %X) {
         %Y = call double @sin(double %X) readonly
         ret double %Y
 }
+; SIN: {{^[ \t]*fsin$}}
+
+; SIN-NOT: fsin
 
+; SIN: test3:
 define x86_fp80 @test3(x86_fp80 %X) {
         %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
+; SIN: {{^[ \t]*fsin$}}
 
+; SIN-NOT: fsin
+; COS-NOT: fcos
 declare float @cosf(float) readonly
 
 declare double @cos(double) readonly
 
 declare x86_fp80 @cosl(x86_fp80) readonly
 
+
+; SIN: test4:
+; COS: test3:
 define float @test4(float %X) {
         %Y = call float @cosf(float %X) readonly
         ret float %Y
 }
+; COS: {{^[ \t]*fcos}}
 
 define double @test5(double %X) {
         %Y = call double @cos(double %X) readonly
         ret double %Y
 }
+; COS: {{^[ \t]*fcos}}
 
 define x86_fp80 @test6(x86_fp80 %X) {
         %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
         ret x86_fp80 %Y
 }
+; COS: {{^[ \t]*fcos}}
 
+; SIN-NOT: fsin
+; COS-NOT: fcos
diff --git a/test/CodeGen/X86/sink-out-of-loop.ll b/test/CodeGen/X86/sink-out-of-loop.ll
new file mode 100644
index 0000000000..c600f925a3
--- /dev/null
+++ b/test/CodeGen/X86/sink-out-of-loop.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+; A MOV32ri is inside a loop, it has two successors, one successor is inside the
+; same loop, the other successor is outside the loop. We should be able to sink
+; MOV32ri outside the loop.
+; rdar://11980766
+define i32 @sink_succ(i32 %argc, i8** nocapture %argv) nounwind uwtable ssp {
+; CHECK: sink_succ
+; CHECK: [[OUTER_LN1:LBB0_[0-9]+]]: ## %preheader
+; CHECK: %exit
+; CHECK-NOT: movl
+; CHECK: jne [[OUTER_LN1]]
+; CHECK: movl
+; CHECK: [[LN2:LBB0_[0-9]+]]: ## %for.body2
+; CHECK: jne [[LN2]]
+; CHECK: ret
+entry:
+  br label %preheader
+
+preheader:
+  %i.127 = phi i32 [ 0, %entry ], [ %inc9, %exit ]
+  br label %for.body1.lr
+
+for.body1.lr:
+  %iv30 = phi i32 [ 1, %preheader ], [ %iv.next31, %for.inc40.i ]
+  br label %for.body1
+
+for.body1:
+  %iv.i = phi i64 [ 0, %for.body1.lr ], [ %iv.next.i, %for.body1 ]
+  %iv.next.i = add i64 %iv.i, 1
+  %lftr.wideiv32 = trunc i64 %iv.next.i to i32
+  %exitcond33 = icmp eq i32 %lftr.wideiv32, %iv30
+  br i1 %exitcond33, label %for.inc40.i, label %for.body1
+
+for.inc40.i:
+  %iv.next31 = add i32 %iv30, 1
+  %exitcond49.i = icmp eq i32 %iv.next31, 32
+  br i1 %exitcond49.i, label %exit, label %for.body1.lr
+
+exit:
+  %inc9 = add nsw i32 %i.127, 1
+  %exitcond34 = icmp eq i32 %inc9, 10
+  br i1 %exitcond34, label %for.body2, label %preheader
+
+for.body2:
+  %iv = phi i64 [ %iv.next, %for.body2 ], [ 0, %exit ]
+  %iv.next = add i64 %iv, 1
+  %lftr.wideiv = trunc i64 %iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 2048
+  br i1 %exitcond, label %for.end20, label %for.body2
+
+for.end20:
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index 4405f68451..0ba02155a6 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false  | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
-; RUN: llc < %s -march=x86-64 -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false  | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math  | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
 ; then can be matched provided that the operands are swapped.
@@ -47,8 +47,7 @@ define double @olt(double %x, double %y) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_inverse:
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_inverse:
 ; FINITE-NEXT: minsd  %xmm0, %xmm1
@@ -65,8 +64,7 @@ define double @ogt_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_inverse:
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_inverse:
 ; FINITE-NEXT: maxsd  %xmm0, %xmm1
@@ -107,8 +105,7 @@ define double @ole(double %x, double %y) nounwind {
 ; CHECK:      oge_inverse:
 ; CHECK-NEXT: ucomisd %xmm1, %xmm0
 ; UNSAFE:      oge_inverse:
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
@@ -123,8 +120,7 @@ define double @oge_inverse(double %x, double %y) nounwind {
 ; CHECK:      ole_inverse:
 ; CHECK-NEXT: ucomisd %xmm0, %xmm1
 ; UNSAFE:      ole_inverse:
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
@@ -142,7 +138,8 @@ define double @ole_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -160,7 +157,8 @@ define double @ogt_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -218,7 +216,8 @@ define double @olt_inverse_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      oge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -234,7 +233,8 @@ define double @oge_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ole_x:
 ; UNSAFE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_x:
 ; FINITE-NEXT: xorp{{[sd]}} %xmm1, %xmm1
@@ -313,8 +313,7 @@ define double @ult(double %x, double %y) nounwind {
 ; CHECK:      ugt_inverse:
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ugt_inverse:
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_inverse:
 ; FINITE-NEXT: minsd   %xmm0, %xmm1
@@ -329,8 +328,7 @@ define double @ugt_inverse(double %x, double %y) nounwind {
 ; CHECK:      ult_inverse:
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      ult_inverse:
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_inverse:
 ; FINITE-NEXT: maxsd   %xmm0, %xmm1
@@ -378,8 +376,7 @@ define double @ule(double %x, double %y) nounwind {
 ; CHECK-NEXT: minsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_inverse:
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_inverse:
 ; FINITE-NEXT: minsd %xmm0, %xmm1
@@ -395,8 +392,7 @@ define double @uge_inverse(double %x, double %y) nounwind {
 ; CHECK-NEXT: maxsd %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_inverse:
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_inverse:
 ; FINITE-NEXT: maxsd %xmm0, %xmm1
@@ -412,7 +408,8 @@ define double @ule_inverse(double %x, double %y) nounwind {
 ; CHECK:      ucomisd %xmm0, %xmm1
 ; UNSAFE:      ugt_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -428,7 +425,8 @@ define double @ugt_x(double %x) nounwind {
 ; CHECK:      ucomisd %xmm1, %xmm0
 ; UNSAFE:      ult_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
-; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_x:
 ; FINITE-NEXT: xorp{{[sd]}}   %xmm1, %xmm1
@@ -483,7 +481,8 @@ define double @ult_inverse_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -502,7 +501,8 @@ define double @uge_x(double %x) nounwind {
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_x:
 ; UNSAFE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
-; UNSAFE-NEXT: minsd  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_x:
 ; FINITE-NEXT: xorp{{[sd]}}  %xmm1, %xmm1
@@ -590,9 +590,7 @@ define double @olt_y(double %x) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ogt_inverse_y:
-; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ogt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
@@ -611,9 +609,7 @@ define double @ogt_inverse_y(double %x) nounwind {
 ; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      olt_inverse_y:
-; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      olt_inverse_y:
 ; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
@@ -657,9 +653,7 @@ define double @ole_y(double %x) nounwind {
 ; CHECK:      oge_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      oge_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      oge_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -675,9 +669,7 @@ define double @oge_inverse_y(double %x) nounwind {
 ; CHECK:      ole_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ole_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ole_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -721,9 +713,7 @@ define double @ult_y(double %x) nounwind {
 ; CHECK:      ugt_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ugt_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ugt_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -739,9 +729,7 @@ define double @ugt_inverse_y(double %x) nounwind {
 ; CHECK:      ult_inverse_y:
 ; CHECK:      ucomisd %xmm
 ; UNSAFE:      ult_inverse_y:
-; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ult_inverse_y:
 ; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
@@ -792,9 +780,7 @@ define double @ule_y(double %x) nounwind {
 ; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      uge_inverse_y:
-; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: minsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      uge_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
@@ -811,9 +797,7 @@ define double @uge_inverse_y(double %x) nounwind {
 ; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
 ; CHECK-NEXT: ret
 ; UNSAFE:      ule_inverse_y:
-; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
-; UNSAFE-NEXT: maxsd %xmm0, %xmm1
-; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
 ; UNSAFE-NEXT: ret
 ; FINITE:      ule_inverse_y:
 ; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
diff --git a/test/CodeGen/X86/stack-align.ll b/test/CodeGen/X86/stack-align.ll
index f6c13ec0ad..0ddb2378ef 100644
--- a/test/CodeGen/X86/stack-align.ll
+++ b/test/CodeGen/X86/stack-align.ll
@@ -10,11 +10,11 @@ target triple = "i686-apple-darwin8"
 define void @test({ double, double }* byval  %z, double* %P) nounwind {
 entry:
 	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
-	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 ) readnone	; <double> [#uses=1]
         store volatile double %tmp4, double* %P
 	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
 	%tmp1 = load volatile double* %tmp, align 8		; <double> [#uses=1]
-	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 ) readnone	; <double> [#uses=1]
     ; CHECK: andpd{{.*}}4(%esp), %xmm
 	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
 	store volatile double %tmp6, double* %P, align 8
diff --git a/test/CodeGen/X86/stack-protector-linux.ll b/test/CodeGen/X86/stack-protector.ll
index c07511443b..c07511443b 100644
--- a/test/CodeGen/X86/stack-protector-linux.ll
+++ b/test/CodeGen/X86/stack-protector.ll
diff --git a/test/CodeGen/X86/store_op_load_fold2.ll b/test/CodeGen/X86/store_op_load_fold2.ll
index 8313166a90..6e4fe90053 100644
--- a/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/test/CodeGen/X86/store_op_load_fold2.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
-; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=att | FileCheck %s -check-prefix=ATT
+; RUN: llc < %s -mtriple=i686-linux -mcpu=corei7 -x86-asm-syntax=intel | FileCheck %s -check-prefix=INTEL
 
 target datalayout = "e-p:32:32"
         %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
diff --git a/test/CodeGen/X86/tailcall-cgp-dup.ll b/test/CodeGen/X86/tailcall-cgp-dup.ll
new file mode 100644
index 0000000000..a80b90f9ee
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-cgp-dup.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Teach CGP to dup returns to enable tail call optimization.
+; rdar://9147433
+
+define i32 @foo(i32 %x) nounwind ssp {
+; CHECK: foo:
+entry:
+  switch i32 %x, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb3
+    i32 4, label %sw.bb5
+    i32 5, label %sw.bb7
+    i32 6, label %sw.bb9
+  ]
+
+sw.bb:                                            ; preds = %entry
+; CHECK: jmp _f1
+  %call = tail call i32 @f1() nounwind
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+; CHECK: jmp _f2
+  %call2 = tail call i32 @f2() nounwind
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+; CHECK: jmp _f3
+  %call4 = tail call i32 @f3() nounwind
+  br label %return
+
+sw.bb5:                                           ; preds = %entry
+; CHECK: jmp _f4
+  %call6 = tail call i32 @f4() nounwind
+  br label %return
+
+sw.bb7:                                           ; preds = %entry
+; CHECK: jmp _f5
+  %call8 = tail call i32 @f5() nounwind
+  br label %return
+
+sw.bb9:                                           ; preds = %entry
+; CHECK: jmp _f6
+  %call10 = tail call i32 @f6() nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb9, %sw.bb7, %sw.bb5, %sw.bb3, %sw.bb1, %sw.bb
+  %retval.0 = phi i32 [ %call10, %sw.bb9 ], [ %call8, %sw.bb7 ], [ %call6, %sw.bb5 ], [ %call4, %sw.bb3 ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @f1()
+
+declare i32 @f2()
+
+declare i32 @f3()
+
+declare i32 @f4()
+
+declare i32 @f5()
+
+declare i32 @f6()
+
+; rdar://11958338
+%0 = type opaque
+
+declare i8* @bar(i8*) uwtable optsize noinline ssp
+
+define hidden %0* @thingWithValue(i8* %self) uwtable ssp {
+entry:
+; CHECK: thingWithValue:
+; CHECK: jmp _bar
+  br i1 undef, label %if.then.i, label %if.else.i
+
+if.then.i:                                        ; preds = %entry
+  br label %someThingWithValue.exit
+
+if.else.i:                                        ; preds = %entry
+  %call4.i = tail call i8* @bar(i8* undef) optsize
+  br label %someThingWithValue.exit
+
+someThingWithValue.exit:                          ; preds = %if.else.i, %if.then.i
+  %retval.0.in.i = phi i8* [ undef, %if.then.i ], [ %call4.i, %if.else.i ]
+  %retval.0.i = bitcast i8* %retval.0.in.i to %0*
+  ret %0* %retval.0.i
+}
diff --git a/test/CodeGen/X86/tailcall-largecode.ll b/test/CodeGen/X86/tailcall-largecode.ll
index c3f4278aec..e9b8721e66 100644
--- a/test/CodeGen/X86/tailcall-largecode.ll
+++ b/test/CodeGen/X86/tailcall-largecode.ll
@@ -49,6 +49,11 @@ define fastcc i32 @direct_manyargs() {
 ;  CHECK: pushq
 ; Pass the stack argument.
 ;  CHECK: movl $7, 16(%rsp)
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into a register which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %rax
 ; Pass the register arguments, in the right registers.
 ;  CHECK: movl $1, %edi
 ;  CHECK: movl $2, %esi
@@ -56,11 +61,6 @@ define fastcc i32 @direct_manyargs() {
 ;  CHECK: movl $4, %ecx
 ;  CHECK: movl $5, %r8d
 ;  CHECK: movl $6, %r9d
-; This is the large code model, so &manyargs_callee may not fit into
-; the jmp instruction.  Put it into R11, which won't be clobbered
-; while restoring callee-saved registers and won't be used for passing
-; arguments.
-;  CHECK: movabsq $manyargs_callee, %rax
 ; Adjust the stack to "return".
 ;  CHECK: popq
 ; And tail-call to the target.
diff --git a/test/CodeGen/X86/thiscall-struct-return.ll b/test/CodeGen/X86/thiscall-struct-return.ll
index a7be48355f..0507cb890c 100644
--- a/test/CodeGen/X86/thiscall-struct-return.ll
+++ b/test/CodeGen/X86/thiscall-struct-return.ll
@@ -10,7 +10,7 @@ declare x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* noalias sret %agg.result
 
 define void @testv() nounwind {
 ; CHECK: testv:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
 ; CHECK-NEXT: movl	%esi, (%esp)
 ; CHECK-NEXT: calll _ZN1CC1Ev
 ; CHECK: leal 8(%esp), %eax
@@ -29,7 +29,7 @@ entry:
 
 define void @test2v() nounwind {
 ; CHECK: test2v:
-; CHECK: leal
+; CHECK: leal 16(%esp), %esi
 ; CHECK-NEXT: movl	%esi, (%esp)
 ; CHECK-NEXT: calll _ZN1CC1Ev
 ; CHECK: leal 8(%esp), %eax
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index 8269c43b9a..3fca9f5a37 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
 ; RUN:   | FileCheck -check-prefix=X32 %s
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN: llc < %s -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
 ; RUN:   | FileCheck -check-prefix=X64 %s
 
 @i = thread_local global i32 15
diff --git a/test/CodeGen/X86/unreachable-stack-protector.ll b/test/CodeGen/X86/unreachable-stack-protector.ll
deleted file mode 100644
index b066297ff1..0000000000
--- a/test/CodeGen/X86/unreachable-stack-protector.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -disable-cgp-delete-dead-blocks | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
-
-define void @test5() nounwind optsize noinline ssp {
-entry:
-; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
-  %buf = alloca [64 x i8], align 16
-  %0 = call i64 @llvm.objectsize.i64(i8* undef, i1 false)
-  br i1 false, label %if.end, label %if.then
-
-if.then:                                          ; preds = %entry
-  unreachable
-
-if.end:                                           ; preds = %entry
-  ret void
-}
diff --git a/test/CodeGen/X86/v-binop-widen2.ll b/test/CodeGen/X86/v-binop-widen2.ll
index ae3f55a316..569586af49 100644
--- a/test/CodeGen/X86/v-binop-widen2.ll
+++ b/test/CodeGen/X86/v-binop-widen2.ll
@@ -1,9 +1,16 @@
-; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
 
 %vec = type <6 x float>
 ; CHECK: divss
 ; CHECK: divss
 ; CHECK: divps
+
+; Scheduler causes a different instruction order to be produced on Intel Atom
+; ATOM: divps
+; ATOM: divss
+; ATOM: divss
+
 define %vec @vecdiv( %vec %p1, %vec %p2)
 {
   %result = fdiv %vec %p1, %p2
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
index 91777f7aa6..46d6a23554 100644
--- a/test/CodeGen/X86/vec_compare-2.ll
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -10,8 +10,7 @@ define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
 entry:
 ; CHECK: cfi_def_cfa_offset
 ; CHECK-NOT: set
-; CHECK: movzwl
-; CHECK: movzwl
+; CHECK: punpcklwd
 ; CHECK: pshufd
 ; CHECK: pshufb
   %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/vec_compare.ll b/test/CodeGen/X86/vec_compare.ll
index 39c9b770d5..367dd27f30 100644
--- a/test/CodeGen/X86/vec_compare.ll
+++ b/test/CodeGen/X86/vec_compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i386-apple-darwin | FileCheck %s
 
 
 define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
diff --git a/test/CodeGen/X86/vec_shuffle-38.ll b/test/CodeGen/X86/vec_shuffle-38.ll
index 96ef883c4e..ec196df7ae 100644
--- a/test/CodeGen/X86/vec_shuffle-38.ll
+++ b/test/CodeGen/X86/vec_shuffle-38.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
 
 define <2 x double> @ld(<2 x double> %p) nounwind optsize ssp {
 ; CHECK: unpcklpd
diff --git a/test/CodeGen/X86/vec_ss_load_fold.ll b/test/CodeGen/X86/vec_ss_load_fold.ll
index 3bd3f7b60b..c294df575a 100644
--- a/test/CodeGen/X86/vec_ss_load_fold.ll
+++ b/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -70,3 +70,17 @@ define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
 ; CHECK: call
 ; CHECK: roundss $4, %xmm{{.*}}, %xmm0
 }
+
+; PR13576 
+define  <2 x double> @test5() nounwind uwtable readnone noinline {
+entry:
+  %0 = tail call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> <double
+4.569870e+02, double 1.233210e+02>, i32 128) nounwind readnone
+  ret <2 x double> %0
+; CHECK: test5:
+; CHECK: mov
+; CHECK: mov
+; CHECK: cvtsi2sd
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
diff --git a/test/CodeGen/X86/vshift-1.ll b/test/CodeGen/X86/vshift-1.ll
index 49551562c5..e775750bbe 100644
--- a/test/CodeGen/X86/vshift-1.ll
+++ b/test/CodeGen/X86/vshift-1.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psllq
+; CHECK: psllq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %shl = shl <2 x i64> %val, %1
@@ -38,7 +38,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-2.ll b/test/CodeGen/X86/vshift-2.ll
index 9a9b419abe..9496893bd1 100644
--- a/test/CodeGen/X86/vshift-2.ll
+++ b/test/CodeGen/X86/vshift-2.ll
@@ -16,7 +16,7 @@ define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
 entry:
 ; CHECK: shift1b:
 ; CHECK: movd
-; CHECK-NEXT: psrlq
+; CHECK: psrlq
   %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
   %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
   %lshr = lshr <2 x i64> %val, %1
@@ -37,7 +37,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrld
+; CHECK: psrld
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -63,7 +63,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psrlw
+; CHECK: psrlw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-3.ll b/test/CodeGen/X86/vshift-3.ll
index 8e8a9aa04b..b2b48b9da9 100644
--- a/test/CodeGen/X86/vshift-3.ll
+++ b/test/CodeGen/X86/vshift-3.ll
@@ -28,7 +28,7 @@ define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift2b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
   %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
@@ -52,7 +52,7 @@ entry:
 ; CHECK: shift3b:
 ; CHECK: movzwl
 ; CHECK: movd
-; CHECK-NEXT: psraw
+; CHECK: psraw
   %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
   %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
   %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
diff --git a/test/CodeGen/X86/vshift-5.ll b/test/CodeGen/X86/vshift-5.ll
index cb254aeb57..f6c311dee5 100644
--- a/test/CodeGen/X86/vshift-5.ll
+++ b/test/CodeGen/X86/vshift-5.ll
@@ -6,7 +6,7 @@ define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5a:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -20,7 +20,7 @@ define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
 entry:
 ; CHECK: shift5b:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %amt = load i32* %pamt 
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
@@ -34,7 +34,7 @@ define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5c:
 ; CHECK: movd
-; CHECK-NEXT: pslld
+; CHECK: pslld
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shl = shl <4 x i32> %val, %shamt
@@ -47,7 +47,7 @@ define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
 entry:
 ; CHECK: shift5d:
 ; CHECK: movd
-; CHECK-NEXT: psrad
+; CHECK: psrad
   %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
   %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
   %shr = ashr <4 x i32> %val, %shamt
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index f55b184f3a..d86042a448 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -2,7 +2,6 @@
 ; CHECK: incl
 ; CHECK: incl
 ; CHECK: incl
-; CHECK: addl
 
 ; Widen a v3i16 to v8i16 to do a vector add
 
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll
index 4330aae8ec..ebdfea9a37 100644
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,7 +1,14 @@
-; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=generic -mattr=+sse42 < %s | FileCheck %s
+; RUN: llc -march=x86 -mcpu=atom -mattr=+sse42 < %s | FileCheck -check-prefix=ATOM %s
+
 ; CHECK: paddd
-; CHECK: pextrd
-; CHECK: movd
+; CHECK: movl
+; CHECK: movlpd
+
+; Scheduler causes produce a different instruction order
+; ATOM: movl
+; ATOM: paddd
+; ATOM: movlpd
 
 ; bitcast a v4i16 to v2i32
 
diff --git a/test/CodeGen/X86/widen_cast-5.ll b/test/CodeGen/X86/widen_cast-5.ll
index 136578df1e..9086d3a9cf 100644
--- a/test/CodeGen/X86/widen_cast-5.ll
+++ b/test/CodeGen/X86/widen_cast-5.ll
@@ -1,9 +1,8 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
 ; CHECK: movl
-; CHECK: movd
+; CHECK: movlpd
 
 ; bitcast a i64 to v2i32
-
 define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
 entry:
 	%conv = bitcast i64 %src to <2 x i32>
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll
index 4aeec9136d..d5437281b2 100644
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -1,18 +1,12 @@
 ; RUN: llc < %s -o - -mtriple=x86_64-linux -mcpu=corei7 | FileCheck %s
-; RUN: llc < %s -o - -mtriple=x86_64-win32 -mcpu=corei7 | FileCheck %s -check-prefix=WIN64
 ; PR4891
 
 ; Both loads should happen before either store.
 
-; CHECK: movd  ({{.*}}), {{.*}}
-; CHECK: movd  ({{.*}}), {{.*}}
-; CHECK: movd  {{.*}}, ({{.*}})
-; CHECK: movd  {{.*}}, ({{.*}})
-
-; WIN64: movd  ({{.*}}), {{.*}}
-; WIN64: movd  ({{.*}}), {{.*}}
-; WIN64: movd  {{.*}}, ({{.*}})
-; WIN64: movd  {{.*}}, ({{.*}})
+; CHECK: movl  ({{.*}}), {{.*}}
+; CHECK: movl  ({{.*}}), {{.*}}
+; CHECK: movl  {{.*}}, ({{.*}})
+; CHECK: movl  {{.*}}, ({{.*}})
 
 define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
 entry:
diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll
index ddc4cab14a..996bfc40ee 100644
--- a/test/CodeGen/X86/xor.ll
+++ b/test/CodeGen/X86/xor.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=corei7 -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
 
 ; Though it is undefined, we want xor undef,undef to produce zero.
 define <4 x i32> @test1() nounwind {
@@ -31,7 +31,7 @@ entry:
 ; X64: test3:
 ; X64:	notl
 ; X64:	andl
-; X64:	shrl	%eax
+; X64:	shrl
 ; X64:	ret
 
 ; X32: test3: